Load libraries

library(signal)
library(tidyverse)
library(here)
library(lubridate)
library(dtplyr)
library(sf)
library(knitr)
library(mgcv)
library(future)
library(furrr)
library(progressr)
library(pracma)

Set a simple console progress bar

handlers("txtprogressbar") # Simple console progress bar

Supress package messages

suppressPackageStartupMessages({
  library(mgcv)
  library(nlme)
})

Load previously created objects

load(file = "objects/data_RS_S2_bands_indices.Rdata")
load(file = "objects/GAM_data_S2.Rdata")
load(file = "objects/smoothed_data_S2.Rdata")
load(file = "objects/monthly_avg_indices_S2.Rdata")

Load Resurvey db

db_Europa_allobs <- read_csv(
  here("data", "clean", "db_Europa_allobs.csv")) %>%
  select(PlotObservationID, EUNISa_1, EUNISa_1_descr,
         EUNISa_2, EUNISa_2_descr) %>%
  mutate(PlotObservationID = factor(PlotObservationID),
         EUNISa_1 = factor(EUNISa_1), EUNISa_2 = factor(EUNISa_2))

Define printall function

printall <- function(tibble) {
  print(tibble, width = Inf)
  }

Read files with band data

I got these files using the GEE code prepared by Bea.

These files contain all observations in the ReSurvey database from 2016 onward. In order to avoid computation problems in GEE, biogeographical units that contain more than 4500 points have been subdivided in ArcGIS.

# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/S2/Bands/all"

# List CSV files
csv_files <- list.files(folder_path, full.names = TRUE, recursive = TRUE)

# Function to extract biogeo and unit from the filename
extract_info <- function(filename) {
  first_word <- strsplit(filename, "_")[[1]][1]
  biogeo <- str_extract(first_word,
                        "^(ALP|ANA|ARC|ATL|BLACKSEA|BOR|CON|MACARONESIA|MED|PANONIA|STEPPIC)")
  unit <- str_remove(first_word, biogeo)
  if (is.na(unit) || unit == "") unit <- NA_character_
  list(biogeo = biogeo, unit = unit)
  }


# Define column types: force RSrvypl to character, others auto-detected
custom_col_types <- cols(
  RSrvypl = col_character(),
  RSrvyst = col_character(),
  default = col_guess()
)

# Read and process each file
data_list <- lapply(csv_files, function(file) {
  info <- extract_info(basename(file)) # Use only the filename
  
  # Read the file
  df <- read_csv(file, col_types = custom_col_types) %>%
    # Remove columns that give column type problems when combining data
    select(-starts_with("EUNIS"), -starts_with("ReSurvey")) %>%
    mutate(biogeo = info$biogeo, unit = info$unit)
  
  return(df)
  })

# Combine all data
data_RS_S2_bands <- bind_rows(data_list) %>%
  rename(PlotObservationID = PltObID)

# View the resulting tibble
print(data_RS_S2_bands)

# Counts per biogeo and unit
print(data_RS_S2_bands %>% count(biogeo, unit), n = 100)

Some checks

Check that the year in the date of the images is not different to the sampling year:

data_RS_S2_bands %>% dplyr::filter(year != year(date))

Check how many different images are for each observation, date and time:

data_RS_S2_bands %>% group_by(PlotObservationID, date, time_utc) %>%
  summarise(n_images = n_distinct(image_id), .groups = "drop") %>%
  count(n_images)

Average the bands

When there is more than one image for each point and day, average the values of the bands:

Calculate indices

Save:

Plot n_daytime:

data_RS_S2_bands_indices %>%
  group_by(PlotObservationID) %>%
  summarise(n_days = first(n_days)) %>% ungroup() %>%
  ggplot(aes(x = n_days)) + geom_histogram(color = "black", fill = "white") +
  theme_minimal()

Compute phenological metrics from models fitted to time series data

Function

HERE (already done in Landsat): Modify function and rerun everything from here (TBD)

Using GAMs, reweighting and 3 iterations.

Using both a change detection method (maximum slope) and a threshold method (50% amplitude) to calculate sos and eos.

Approach similar to https://doi.org/10.1016/j.jag.2020.102172 for GAM fitting and change detection method, and to https://www.mdpi.com/2072-4292/12/22/3738 fot threshold method.

Define function to compute phenology metrics using GAM fit and NDVI / EVI / SAVI:

compute_metrics_models <- function(df, index_cols = c("NDVI", "EVI", "SAVI")) {
  suppressPackageStartupMessages({
    library(mgcv)
    library(nlme)
    })
  
  plan(multisession)  # Set up parallel processing
  
  # Create a list of index-specific data frames
  index_dfs <- lapply(index_cols, function(index_col) {
    list(index_col = index_col, df = df %>%
           select(DOY, PlotObservationID, all_of(index_col)))
    })
  
  # Define the processing function for each index
  process_index <- function(index_data) {
    index_col <- index_data$index_col
    df_index <- index_data$df %>%
      filter(!is.na(.data[[index_col]])) %>%
      arrange(DOY)
    
    plot_id <- unique(df_index$PlotObservationID)

    if (nrow(df_index) < 10) {
      message("  Skipped: insufficient data (< 10 rows)")
      return(tibble(PlotObservationID = plot_id, index = index_col,
                    sos_slope = NA_real_, sos_threshold = NA_real_,
                    pos = NA_real_, eos_slope = NA_real_, 
                    eos_threshold = NA_real_, auc_slope = NA_real_,
                    auc_threshold = NA_real_, Vmax = NA_real_,
                    DOY = df_index$DOY, value = NA_real_))
    }
    
    # Replace early/late DOY values
    base_value_early <- mean(df_index %>% filter(DOY <= 50) %>% 
                               pull(index_col), na.rm = TRUE)
    base_value_late  <- mean(df_index %>% filter(DOY >= 315) %>% 
                               pull(index_col), na.rm = TRUE)

    df_index <- df_index %>%
      mutate(!!index_col := case_when(
        DOY <= 50 ~ base_value_early,
        DOY >= 315 ~ base_value_late,
        TRUE ~ .data[[index_col]]
      ))

    x <- df_index$DOY
    y <- df_index[[index_col]]
    weights <- rep(1, length(y))
    
    # GAM fit
    pred <- NULL
    for (i in 1:3) {
      gam_fit <- tryCatch({
        mgcv::bam(y ~ s(x, bs = "tp"),weights = weights)
        }, error = function(e) {
          message("  GAM fitting failed for ", plot_id, " - ", index_col, ": ", 
                  e$message)
          return(NULL)
          })
      if (is.null(gam_fit)) {
        return(tibble(
          PlotObservationID = plot_id,
          index = index_col,
          sos_slope = NA_real_,
          sos_threshold = NA_real_,
          pos = NA_real_,
          eos_slope = NA_real_, 
          eos_threshold = NA_real_, 
          auc_slope = NA_real_,
          auc_threshold = NA_real_, 
          Vmin_pre = NA_real_, 
          Vmin_post = NA_real_,
          Vmax = NA_real_, 
          u_sos = NA_real_, 
          u_eos = NA_real_,
          DOY = df_index$DOY,
          value = NA_real_))
        }
      
      pred <- tryCatch({
        predict(gam_fit, newdata = tibble(x = x))
        }, error = function(e) {
          message("Prediction failed for ", plot_id, " - ", index_col, ": ",
                  e$message)
          return(rep(NA_real_, length(x)))
          })
      
      idx_between <- which(x > 50 & x < 315 & !is.na(pred) & pred != 0)
      weights <- rep(1, length(y))
      weights[idx_between] <- (y[idx_between] / (pred[idx_between] + 1e-6))^4
      weights[weights > 1 | is.na(weights)] <- 1
      }
    
    # Compute metrics
    slope <- c(NA, diff(pred))
    idx <- which(x >= 50 & x <= 315)
    pos <- if (length(idx) > 0) x[idx][which.max(pred[idx])] else NA_real_

    sos_slope <- if (!is.na(pos)) {
      idx <- which(x < pos)
      if (length(idx) > 0) x[idx][which.max(slope[idx])] else NA_real_
    } else NA_real_

    eos_slope <- if (!is.na(pos)) {
      idx <- which(x > pos)
      if (length(idx) > 0) x[idx][which.min(slope[idx])] else NA_real_
    } else NA_real_

    integration_idx_slope <- which(x >= sos_slope & x <= 
                                     eos_slope & !is.na(pred))
    auc_slope <- if (length(integration_idx_slope) > 1) {
      sum(diff(x[integration_idx_slope]) * 
            zoo::rollmean(pred[integration_idx_slope], 2))
      } else NA_real_
    
    # Vmin antes y después del pico
    Vmin_pre <- if (!is.na(pos)) min(pred[x <= pos], na.rm = TRUE)else NA_real_
    Vmin_post <- if (!is.na(pos)) min(pred[x >= pos], na.rm = TRUE) else NA_real_
    Vmax <- max(pred, na.rm = TRUE)
    
    # Umbrales relativos
    p <- 0.5
    u_sos <- if (!is.na(Vmin_pre)) Vmin_pre + p * (Vmax - Vmin_pre) else NA_real_
    u_eos <- if (!is.na(Vmin_post)) Vmin_post + p * (Vmax - Vmin_post) else NA_real_
    
    # DOY donde se cruzan los umbrales
    sos_threshold <- if (!is.na(u_sos)) x[which(pred >= u_sos)[1]] else NA_real_
    eos_threshold <- if (!is.na(u_eos)) x[rev(which(pred >= u_eos))[1]] else NA_real_
    
    integration_idx_threshold <- which(x >= sos_threshold & 
                                         x <= eos_threshold & !is.na(pred))
    auc_threshold <- if (length(integration_idx_threshold) > 1) {
      sum(diff(x[integration_idx_threshold]) * 
            zoo::rollmean(pred[integration_idx_threshold], 2))
      } else NA_real_
    
    # 1. Predicciones por DOY
    fits_df <- tibble(
      PlotObservationID = unique(df_index$PlotObservationID),
      DOY = x,
      value = pred,
      index = index_col
      )
    
    # 2. Métricas resumen
    metrics_df <- tibble(
      PlotObservationID = unique(df_index$PlotObservationID),
      index = index_col,
      sos_slope = sos_slope,
      sos_threshold = sos_threshold,
      pos = pos,
      eos_slope = eos_slope,
      eos_threshold = eos_threshold,
      auc_slope = auc_slope,
      auc_threshold = auc_threshold,
      Vmin_pre = Vmin_pre,
      Vmin_post = Vmin_post,
      Vmax = Vmax,
      u_sos = u_sos,
      u_eos = u_eos
      )
    
    # 3. Unir por PlotObservationID, index
    final_df <- left_join(fits_df, metrics_df, 
                          by = c("PlotObservationID", "index"))
  }
  
  # Run in parallel
  results <- future_map(index_dfs, process_index, .progress = TRUE)
  results <- purrr::compact(results)  # removes NULLs
  if (length(results) == 0) return(tibble())  # or return(NULL)
  bind_rows(results)
}

Calculation

Apply the function with batch processing

plan(sequential)

Save

Look:

GAM_data

Save as an object:

Extract average values of indices per month

Extract average values of indices per month and AUC between March and October

extract_monthly_avg_indices <- function(
  GAM_data, 
  monthly_doys = list("01" = 1:31, "02" = 32:59, "03" = 60:90, "04" = 91:120, 
                      "05" = 121:151, "06" = 152:181, "07" = 182:212, 
                      "08" = 213:243, "09" = 244:273, "10" = 274:304,
                      "11" = 305:334, "12" = 335:365)) {
  
  monthly_df <- GAM_data %>%
    mutate(month = purrr::map_chr(DOY, function(doy) {
      month_name <- names(monthly_doys)[sapply(monthly_doys, 
                                               function(r) doy %in% r)]
      if (length(month_name) > 0) month_name else NA_character_
    })) %>%
    dplyr::filter(!is.na(month)) %>%
    group_by(PlotObservationID, index, month) %>%
    summarise(avg_value = mean(value, na.rm = TRUE), .groups = "drop") %>%
    mutate(avg_value = ifelse(is.infinite(avg_value), NA, avg_value)) %>%
    arrange(PlotObservationID, match(month, names(monthly_doys))) %>%
    pivot_wider(names_from = month, values_from = avg_value, 
                names_prefix = "avg_value_")
  
  # Calcular AUC entre marzo y octubre usando regla del trapecio
  months_auc <- c("03", "04", "05", "06", "07", "08", "09", "10")
  # DOY aproximado del centro de cada mes
  doy_midpoints <- c(75, 105, 135, 165, 195, 225, 255, 285)  
  
  monthly_df <- monthly_df %>%
    rowwise() %>%
    mutate(
      auc_mar_oct = {
        values <- c_across(all_of(paste0("avg_value_", months_auc)))
        if (any(is.na(values))) NA_real_ else sum(diff(doy_midpoints) *
                                                    zoo::rollmean(values, 2))
      }
    ) %>%
    ungroup()
  
  return(monthly_df)
}

Save as an object:

Assess time series quality

For the time series to be acceptable, it should have a reasonable number of time points, and these points should be distributed along almost all months (could be ok to miss the winter months).

In GAM data, check how many time points are there for each PlotObservationID, how many months, and which months are missing.

ts_quality <- GAM_data %>%
  # Filter only NDVI (all indices will have the same time points)
  dplyr::filter(index == "NDVI") %>%
  # Get month from DOY
  mutate(month = month(ymd("2020-01-01") + days(DOY - 1))) %>%
  # For each PlotObservationID
  group_by(PlotObservationID) %>%
  # Get the number of time points (days) and the number of months
  summarise(
    n_days = n_distinct(DOY),
    n_months = n_distinct(month),
    .groups = "drop"
  ) %>%
  left_join(GAM_data %>%
              # Filter only NDVI
              dplyr::filter(index == "NDVI") %>%
              # Get month from DOY
              mutate(month = month(ymd("2020-01-01") + days(DOY - 1))) %>%
              # Get unique values of PlotObservationID and month
              distinct(PlotObservationID, month) %>%
              # Add 1 as value
              mutate(value = 1) %>%
              # Reshape to wide format and add zeros when month is missing
              pivot_wider(
                names_from = month,
                names_prefix = "month",
                values_from = value,
                values_fill = 0),
            by = "PlotObservationID")

Histograms time points and n months:

ggplot(ts_quality, aes(x = n_days)) +
  geom_histogram(color = "black", fill = "white") +
  xlab("Number of time points (days) in the S2 time series") +
  theme_minimal()

ggplot(ts_quality, aes(x = n_months)) +
  geom_histogram(color = "black", fill = "white") +
  xlab("Number of months in the S2 time series") +
  theme_minimal()

Count how many PlotObservationIDs have missing data (value 0) for each month:

obs_missing_month <- ts_quality %>%
  summarise(across(starts_with("month"), ~ sum(.x == 0))) %>%
  pivot_longer(cols = everything(), names_to = "month", values_to = "nobs_missing")

ggplot(obs_missing_month %>%
         mutate(month = factor(month, levels = paste0("month", 1:12))), 
       aes(x = month, y = nobs_missing)) + geom_bar(stat = "identity") +
  ylab("Number of PlotObservationID with missing data") +
  ggtitle("Missing data in S2 time series") +
  theme_minimal()

Add quality flag:

ts_quality_flag <- ts_quality %>%
  rowwise() %>%
  mutate(
    #  If 2 consecutive months of the period March-October are missing
    # quality_flag = 0
    quality_flag = {
      months <- c_across(month3:month10)
      if (any(months[-length(months)] == 0 & months[-1] == 0)) 0 else 1
    }
  ) %>%
  ungroup()
ts_quality_flag %>% count(quality_flag)

Boxplot comparing moments for different indices

GAM_data %>% 
  select(PlotObservationID, index, sos_slope, sos_threshold, pos, eos_slope,
         eos_threshold) %>% distinct() %>%
  pivot_longer(cols = c(sos_slope, sos_threshold, pos, eos_slope, eos_threshold),
               names_to = "moment", values_to = "value") %>%
  ggplot(aes(x = moment, y = value, fill = index)) + geom_boxplot() +
  theme_minimal()

Plot fit and moments for each PlotObservationID

Quality = 1

# Get unique IDs with quality_flag == 1
ids_q1 <- ts_quality_flag %>%
  dplyr::filter(quality_flag == 1) %>%
  mutate(PlotObservationID = droplevels(PlotObservationID)) %>%
  pull(PlotObservationID)
GAM_data_ids_q1 <- GAM_data %>%
  # Join to get biogeo and unit
  left_join(data_RS_S2_bands_indices %>%
              select(PlotObservationID, biogeo, unit) %>%
              distinct()) %>%
  # Join to get EUNIS info
   left_join(db_Europa_allobs %>%
               select(PlotObservationID, EUNISa_1, EUNISa_1_descr,
                      EUNISa_2, EUNISa_2_descr)) %>%
  # Join to get original values of indices
  left_join(data_RS_S2_bands_indices %>%
              select(PlotObservationID, DOY, NDVI, EVI, SAVI) %>%
              pivot_longer(cols = c(NDVI, EVI, SAVI), names_to = "index", 
                           values_to = "value_orig")) %>%
  # Join to get ts_quality data
  left_join(ts_quality_flag %>% select(PlotObservationID, quality_flag)) %>%
  # Keep only those with quality_flag == 1
  dplyr::filter(quality_flag == 1)

Save each plot to a file:

Quality = 0

# Get unique IDs with quality_flag == 0
ids_q0 <- ts_quality_flag %>%
  dplyr::filter(quality_flag == 0) %>%
  mutate(PlotObservationID = droplevels(PlotObservationID)) %>%
  pull(PlotObservationID)
GAM_data_ids_q0 <- GAM_data %>%
  # Join to get biogeo and unit
  left_join(data_RS_S2_bands_indices %>%
              select(PlotObservationID, biogeo, unit) %>%
              distinct()) %>%
  # Join to get EUNIS info
   left_join(db_Europa_allobs %>%
               select(PlotObservationID, EUNISa_1, EUNISa_1_descr,
                      EUNISa_2, EUNISa_2_descr)) %>%
  # Join to get original values of indices
  left_join(data_RS_S2_bands_indices %>%
              select(PlotObservationID, DOY, NDVI, EVI, SAVI) %>%
              pivot_longer(cols = c(NDVI, EVI, SAVI), names_to = "index", 
                           values_to = "value_orig")) %>%
  # Join to get ts_quality data
  left_join(ts_quality_flag %>%
              select(PlotObservationID, n_months, quality_flag)) %>%
  # Keep only those with quality_flag == 0
  dplyr::filter(quality_flag == 0)

Save each plot to a file:

Smooth the time series of NDMI and NDWI

Using GAM, without replacing values in DOY 1–50 and DOY 315–end with separate base values, later use only unweighted GAM.

compute_unweighted_fit <- function(
    # Data frame df with index values over time (DOY)
    df, 
    # Name of the vegetation indices columns (e.g., "NDVI", "EVI", "SAVI)
    index_cols = c("NDMI", "NDWI")
) {
  # Initialize list to store results
  fits_list <- list()
  
  # Loop over each index column
  for (index_col in index_cols) {
    df_index <- df %>%
      # Remove rows with missing index values and sort data by DOY
      filter(!is.na(.data[[index_col]])) %>% arrange(DOY)
    
    # Extract x (DOY) and y (index) vectors for modelling
    x <- df_index$DOY
    y <- df_index[[index_col]]
    
    # If there are fewer than 11 observations or all values are NA, skip
    if (length(x) < 11 || all(is.na(y))) {
      next
    }
    
    # Fit GAM (unweighted) with a thin plate spline (bs = "tp")
    # to smooth the index curve
    gam_unweighted <- mgcv::bam(y ~ s(x, bs = "tp"))
    pred <- predict(gam_unweighted, newdata = tibble(x = x))
    
    # Create tibble to store original and predicted index values
    fits_df <- tibble(
      PlotObservationID = unique(df$PlotObservationID),
      DOY = x,
      index = index_col,
      value = pred
    )
    
    fits_list[[index_col]] <- fits_df
  }
  
  if (length(fits_list) == 0) {
    return(tibble())
  }
  
  bind_rows(fits_list)
}

Apply the function:

Look:

smoothed_data

Save as an object:

Plot fit and moments for each PlotObservationID

smoothed_data_ids <- smoothed_data %>%
  # Join to get biogeo and unit
  left_join(data_RS_S2_bands_indices %>%
              select(PlotObservationID, biogeo, unit) %>%
              distinct()) %>%
  # Join to get EUNIS info
   left_join(db_Europa_allobs %>%
               select(PlotObservationID, EUNISa_1, EUNISa_1_descr,
                      EUNISa_2, EUNISa_2_descr)) %>%
  mutate(PlotObservationID = as.character(PlotObservationID))

Save each plot to a file:

Get indices data (max. and min.)

Careful! These maximum and minimum values are from the smoothed time series. For NDVI / EVI / SAVI values in DOY 1–50 and DOY 315–end, remember that the GAM smoothing function replaced the original values with the mean base value of observations during each of these respective periods. This was so far not done for NDMI and NDWI.

final_indices_data <- GAM_data %>%
  group_by(PlotObservationID, index) %>%
  summarise(max = max(value), min = min(value)) %>%
  ungroup() %>%
  pivot_wider(names_from = index, values_from = c(max, min),
              names_glue = "{index}_{.value}") %>%
  full_join(
    smoothed_data %>%
      group_by(PlotObservationID, index) %>%
      summarise(max = max(value), min = min(value)) %>%
      ungroup() %>%
      pivot_wider(names_from = index, values_from = c(max, min),
                  names_glue = "{index}_{.value}")
    )

Get phenology data

Use GAM iter_3 to get dates of the moments, values at those moments and AUC (time-integrated indices) between SOS and EOS:

# Join to get values at SOS, POS, EOS and auc
final_phenology_data <- GAM_data %>%
  mutate(
    stage = case_when(
      DOY == sos_slope ~ "sos_slope",
      DOY == sos_threshold ~ "sos_threshold",
      DOY == pos ~ "pos",
      DOY == eos_slope ~ "eos_slope",
      DOY == eos_threshold ~ "eos_threshold",
      TRUE ~ NA_character_
    )
  ) %>%
  dplyr::filter(!is.na(stage)) %>%
  select(PlotObservationID, index, stage, doy = DOY, value) %>%
  pivot_wider(
    names_from = c(index, stage),
    values_from = c(doy, value),
    names_glue = "{index}_{stage}_{.value}"
  ) %>%
  # Convert list cols to regular numeric cols
  mutate(
    NDVI_sos_slope_value = map_dbl(NDVI_sos_slope_value, 1),
    NDVI_sos_threshold_value = map_dbl(NDVI_sos_threshold_value, 1),
    NDVI_pos_value = map_dbl(NDVI_pos_value, 1),
    NDVI_eos_slope_value = map_dbl(NDVI_eos_slope_value, 1),
    NDVI_eos_threshold_value = map_dbl(NDVI_eos_threshold_value, 1),
    EVI_sos_slope_value = map_dbl(EVI_sos_slope_value, 1),
    EVI_sos_threshold_value = map_dbl(EVI_sos_threshold_value, 1),
    EVI_pos_value = map_dbl(EVI_pos_value, 1),
    EVI_eos_slope_value = map_dbl(EVI_eos_slope_value, 1),
    EVI_eos_threshold_value = map_dbl(EVI_eos_threshold_value, 1),
    SAVI_sos_slope_value = map_dbl(SAVI_sos_slope_value, 1),
    SAVI_sos_threshold_value = map_dbl(SAVI_sos_threshold_value, 1),
    SAVI_pos_value = map_dbl(SAVI_pos_value, 1),
    SAVI_eos_slope_value = map_dbl(SAVI_eos_slope_value, 1),
    SAVI_eos_threshold_value = map_dbl(SAVI_eos_threshold_value, 1)
  ) %>%
  full_join(GAM_data %>%
              distinct(PlotObservationID, index, auc_slope, auc_threshold) %>%
              pivot_wider(names_from = index, values_from = c(auc_slope, auc_threshold),
                          names_glue = "{index}_{.value}"))

Join indices and phenology data

final_RS_data <- full_join(
  # Indices data (max and min)
  final_indices_data,
  # Average values of indices per month
  monthly_avg_indices %>%
    pivot_wider(names_from = index, values_from = c(avg_value_01:auc_mar_oct),
                names_glue = "{index}_{.value}")
  ) %>%
  full_join(
    # Phenology data
    final_phenology_data 
    ) %>%
  # Sort cols in alphabetical order
  select(PlotObservationID, sort(names(.)[names(.) != "PlotObservationID"]))

Add EUNIS codes

final_RS_data <- final_RS_data %>% left_join(db_Europa_allobs)
data_RS_S2_bands_indices <- data_RS_S2_bands_indices %>%
  left_join(db_Europa_allobs)

Monthly spectrophenology per habitat type

# Prepare the data
data_monthly_EUNISa_1 <- data_RS_S2_bands_indices %>%
  mutate(month = month(date, label = TRUE, abbr = TRUE, locale="EN-us")) %>%
  group_by(month, EUNISa_1, EUNISa_1_descr) %>%
  summarise(
    mean_NDVI = mean(NDVI, na.rm = TRUE),
    sd_NDVI = sd(NDVI, na.rm = TRUE),
    n_NDVI = sum(!is.na(NDVI)),
    mean_EVI = mean(EVI, na.rm = TRUE),
    sd_EVI = sd(EVI, na.rm = TRUE),
    n_EVI = sum(!is.na(EVI)),
    mean_SAVI = mean(SAVI, na.rm = TRUE),
    sd_SAVI = sd(SAVI, na.rm = TRUE),
    n_SAVI = sum(!is.na(SAVI)),
    .groups = "drop"
  )

data_monthly_EUNISa_1 <- data_monthly_EUNISa_1 %>%
  # Add label with n
  left_join(data_monthly_EUNISa_1 %>%
              group_by(EUNISa_1) %>%
              summarise(n_total = sum(n_NDVI, na.rm = TRUE)), 
            by = "EUNISa_1") %>%
  mutate(EUNISa_1_label = paste0(EUNISa_1, " (n = ", n_total, ")"))

data_monthly_EUNISa_2 <- data_RS_S2_bands_indices %>%
  mutate(month = month(date, label = TRUE, abbr = TRUE, locale="EN-us")) %>%
  group_by(month, EUNISa_1, EUNISa_1_descr, EUNISa_2, EUNISa_2_descr) %>%
  summarise(
    mean_NDVI = mean(NDVI, na.rm = TRUE),
    sd_NDVI = sd(NDVI, na.rm = TRUE),
    n_NDVI = sum(!is.na(NDVI)),
    mean_EVI = mean(EVI, na.rm = TRUE),
    sd_EVI = sd(EVI, na.rm = TRUE),
    n_EVI = sum(!is.na(EVI)),
    mean_SAVI = mean(SAVI, na.rm = TRUE),
    sd_SAVI = sd(SAVI, na.rm = TRUE),
    n_SAVI = sum(!is.na(SAVI)),
    .groups = "drop"
  )

data_monthly_EUNISa_2 <- data_monthly_EUNISa_2 %>%
  # Add label with n
  left_join(data_monthly_EUNISa_2 %>%
              group_by(EUNISa_2) %>%
              summarise(n_total = sum(n_NDVI, na.rm = TRUE)), 
            by = "EUNISa_2") %>%
  mutate(EUNISa_2_label = paste0(EUNISa_2, " (n = ", n_total, ")"))

EUNIS level 1

# Plots

# EUNISa_1
ggplot(data_monthly_EUNISa_1 %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_1_label, EUNISa_1_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_1, EUNISa_1_descr, sep = " - ")), 
       aes(x = month, y = mean_NDVI, color = EUNIS, 
           group = EUNISa_1_label)) +
  geom_point() +
  geom_line(aes(group = EUNISa_1)) +
  #geom(aes(ymin = mean_NDVI - sd_NDVI, ymax = mean_NDVI + sd_NDVI), 
  #width = 0.2) +
  labs(
    title = "Monthly NDVI by Habitat Type",
    x = "Month",
    y = "NDVI",
    color = "Habitat (EUNIS1)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS1_NDVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)


ggplot(data_monthly_EUNISa_1 %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_1_label, EUNISa_1_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_1, EUNISa_1_descr, sep = " - ")), 
       aes(x = month, y = mean_EVI, color = EUNIS, 
           group = EUNISa_1_label)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_EVI - sd_EVI, ymax = mean_EVI + sd_EVI), 
                #width = 0.2) +
  labs(
    title = "Monthly EVI by Habitat Type",
    x = "Month",
    y = "EVI",
    color = "Habitat (EUNIS1)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS1_EVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)


ggplot(data_monthly_EUNISa_1 %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_1_label, EUNISa_1_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_1, EUNISa_1_descr, sep = " - ")), 
       aes(x = month, y = mean_SAVI, color = EUNIS, 
           group = EUNISa_1_label)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_SAVI - sd_SAVI, ymax = mean_SAVI + sd_SAVI), 
                #width = 0.2) +
  labs(
    title = "Monthly SAVI by Habitat Type",
    x = "Month",
    y = "SAVI",
    color = "Habitat (EUNIS1)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS1_SAVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)

EUNIS level 2

Q

# NDVI
ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "Q" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Qa" & EUNISa_2 != "Qb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_NDVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_NDVI - sd_NDVI, ymax = mean_NDVI + sd_NDVI), 
                #width = 0.2) +
  labs(
    title = "Monthly NDVI by Habitat Type",
    x = "Month",
    y = "NDVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_Q_NDVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)


# EVI
ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "Q" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Qa" & EUNISa_2 != "Qb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_EVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_EVI - sd_EVI, ymax = mean_EVI + sd_EVI), 
                #width = 0.2) +
  labs(
    title = "Monthly EVI by Habitat Type",
    x = "Month",
    y = "EVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_Q_EVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)


# SAVI
ggplot(data_monthly_EUNISa_2 %>%
         dplyr::filter(EUNISa_1 == "Q" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Qa" & EUNISa_2 != "Qb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_SAVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_SAVI - sd_SAVI, ymax = mean_SAVI + sd_SAVI), 
                #width = 0.2) +
  labs(
    title = "Monthly SAVI by Habitat Type",
    x = "Month",
    y = "SAVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_Q_SAVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)

R

ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "R" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_NDVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_NDVI - sd_NDVI, ymax = mean_NDVI + sd_NDVI), 
                #width = 0.2) +
  labs(
    title = "Monthly NDVI by Habitat Type",
    x = "Month",
    y = "NDVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_R_NDVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)


ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "R" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_EVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_EVI - sd_EVI, ymax = mean_EVI + sd_EVI), 
                #width = 0.2) +
  labs(
    title = "Monthly EVI by Habitat Type",
    x = "Month",
    y = "EVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_R_EVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)


ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "R" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_SAVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_SAVI - sd_SAVI, ymax = mean_SAVI + sd_SAVI), 
                #width = 0.2) +
  labs(
    title = "Monthly SAVI by Habitat Type",
    x = "Month",
    y = "SAVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_R_SAVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)

S

ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "S" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Sa" & EUNISa_2 != "Sb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_NDVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_NDVI - sd_NDVI, ymax = mean_NDVI + sd_NDVI), 
                #width = 0.2) +
  labs(
    title = "Monthly NDVI by Habitat Type",
    x = "Month",
    y = "NDVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_S_NDVI.jpeg"),
  dpi = 300, width = 8, height = 2.5)


ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "S" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Sa" & EUNISa_2 != "Sb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_EVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_EVI - sd_EVI, ymax = mean_EVI + sd_EVI), 
                #width = 0.2) +
  labs(
    title = "Monthly EVI by Habitat Type",
    x = "Month",
    y = "EVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_S_EVI.jpeg"),
  dpi = 300, width = 8, height = 2.5)


ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "S" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Sa" & EUNISa_2 != "Sb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_SAVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_SAVI - sd_SAVI, ymax = mean_SAVI + sd_SAVI), 
                #width = 0.2) +
  labs(
    title = "Monthly SAVI by Habitat Type",
    x = "Month",
    y = "SAVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_S_SAVI.jpeg"),
  dpi = 300, width = 8, height = 2.5)

T

ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "T" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_NDVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_NDVI - sd_NDVI, ymax = mean_NDVI + sd_NDVI), 
                #width = 0.2) +
  labs(
    title = "Monthly NDVI by Habitat Type",
    x = "Month",
    y = "NDVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_T_NDVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)


ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "T" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_EVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_EVI - sd_EVI, ymax = mean_EVI + sd_EVI), 
                #width = 0.2) +
  labs(
    title = "Monthly EVI by Habitat Type",
    x = "Month",
    y = "EVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_T_EVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)


ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "T" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_SAVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_SAVI - sd_SAVI, ymax = mean_SAVI + sd_SAVI), 
                #width = 0.2) +
  labs(
    title = "Monthly SAVI by Habitat Type",
    x = "Month",
    y = "SAVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology", "EUNIS2_T_SAVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)

Calculate other phenological metrics

final_RS_data <- final_RS_data %>%
  mutate(
    # with slope method
    # Growing season duration
    NDVI_slope_gsd = NDVI_eos_slope_doy - NDVI_sos_slope_doy,
    EVI_slope_gsd = NDVI_eos_slope_doy - NDVI_sos_slope_doy,
    SAVI_slope_gsd = SAVI_eos_slope_doy - SAVI_sos_slope_doy,
    # Difference in value between pos and sos
    NDVI_diff_pos_sos_slope_value = NDVI_pos_value - NDVI_sos_slope_value,
    EVI_diff_pos_sos_slope_value = EVI_pos_value - EVI_sos_slope_value,
    SAVI_diff_pos_sos_slope_value = SAVI_pos_value - SAVI_sos_slope_value,
    # Difference in value between pos and eos
    NDVI_diff_pos_eos_slope_value = NDVI_pos_value - NDVI_eos_slope_value,
    EVI_diff_pos_eos_slope_value = EVI_pos_value - EVI_eos_slope_value,
    SAVI_diff_pos_eos_slope_value = SAVI_pos_value - SAVI_eos_slope_value,
    # Difference in doy between pos and sos
    NDVI_diff_pos_sos_slope_doy = NDVI_pos_doy - NDVI_sos_slope_doy,
    EVI_diff_pos_sos_slope_doy = EVI_pos_doy - EVI_sos_slope_doy,
    SAVI_diff_pos_sos_slope_doy = SAVI_pos_doy - SAVI_sos_slope_doy,
    # Absolute difference in doy between pos and eos
    NDVI_diff_pos_eos_slope_doy = abs(NDVI_pos_doy - NDVI_eos_slope_doy),
    EVI_diff_pos_eos_slope_doy = abs(EVI_pos_doy - EVI_eos_slope_doy),
    SAVI_diff_pos_eos_slope_doy = abs(SAVI_pos_doy - SAVI_eos_slope_doy),
    # With threshold method
    # Growing season duration
    NDVI_threshold_gsd = NDVI_eos_threshold_doy - NDVI_sos_threshold_doy,
    EVI_threshold_gsd = NDVI_eos_threshold_doy - NDVI_sos_threshold_doy,
    SAVI_threshold_gsd = SAVI_eos_threshold_doy - SAVI_sos_threshold_doy,
    # Difference in value between pos and sos
    NDVI_diff_pos_sos_threshold_value =
      NDVI_pos_value - NDVI_sos_threshold_value,
    EVI_diff_pos_sos_threshold_value = 
      EVI_pos_value - EVI_sos_threshold_value,
    SAVI_diff_pos_sos_threshold_value = 
      SAVI_pos_value - SAVI_sos_threshold_value,
    # Difference in value between pos and eos
    NDVI_diff_pos_eos_threshold_value =
      NDVI_pos_value - NDVI_eos_threshold_value,
    EVI_diff_pos_eos_threshold_value = 
      EVI_pos_value - EVI_eos_threshold_value,
    SAVI_diff_pos_eos_threshold_value = 
      SAVI_pos_value - SAVI_eos_threshold_value,
    # Difference in doy between pos and sos
    NDVI_diff_pos_sos_threshold_doy = NDVI_pos_doy - NDVI_sos_threshold_doy,
    EVI_diff_pos_sos_threshold_doy = EVI_pos_doy - EVI_sos_threshold_doy,
    SAVI_diff_pos_sos_threshold_doy = SAVI_pos_doy - SAVI_sos_threshold_doy,
    # Absolute difference in doy between pos and eos
    NDVI_diff_pos_eos_threshold_doy = 
      abs(NDVI_pos_doy - NDVI_eos_threshold_doy),
    EVI_diff_pos_eos_threshold_doy = 
      abs(EVI_pos_doy - EVI_eos_threshold_doy),
    SAVI_diff_pos_eos_threshold_doy = 
      abs(SAVI_pos_doy - SAVI_eos_threshold_doy),
    # With months method
    # Difference in value between pos and march
    NDVI_diff_pos_march_value = NDVI_pos_value - NDVI_avg_value_03,
    EVI_diff_pos_march_value = EVI_pos_value - EVI_avg_value_03,
    SAVI_diff_pos_march_value = SAVI_pos_value - SAVI_avg_value_03,
    # Difference in value between pos and oct
    NDVI_diff_pos_oct_value = NDVI_pos_value - NDVI_avg_value_10,
    EVI_diff_pos_oct_value = EVI_pos_value - EVI_avg_value_10,
    SAVI_diff_pos_oct_value = SAVI_pos_value - SAVI_avg_value_10,
    # Difference in doy between pos and march
    NDVI_diff_pos_march_doy = NDVI_pos_doy - 75, 
    EVI_diff_pos_march_doy = EVI_pos_doy - 75,
    SAVI_diff_pos_march_doy = SAVI_pos_doy - 75,
    # Difference in doy between pos and oct
    NDVI_diff_pos_oct_doy = abs(NDVI_pos_doy - 285),
    EVI_diff_pos_oct_doy = abs(EVI_pos_doy - 285),
    SAVI_diff_pos_oct_doy = abs(SAVI_pos_doy - 285)
  )

Checks

Slope method

# Growing season duration should be positive
nrow(final_RS_data %>% 
       dplyr::filter(NDVI_slope_gsd <= 0 | EVI_slope_gsd <= 0 | 
                       SAVI_slope_gsd <= 0))
[1] 0
# Difference in value between pos and sos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_sos_slope_value <= 0))
[1] 108
nrow(final_RS_data %>%
       dplyr::filter(EVI_diff_pos_sos_slope_value <= 0))
[1] 91
nrow(final_RS_data %>%
       dplyr::filter(SAVI_diff_pos_sos_slope_value <= 0))
[1] 47
# Difference in value between pos and eos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_eos_slope_value <= 0))
[1] 340
nrow(final_RS_data %>%
       dplyr::filter(EVI_diff_pos_eos_slope_value <= 0))
[1] 230
nrow(final_RS_data %>%
       dplyr::filter(SAVI_diff_pos_eos_slope_value <= 0))
[1] 78
# Difference in doy between pos and sos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_sos_slope_doy <= 0 |
                       EVI_diff_pos_sos_slope_doy <= 0 |
                       SAVI_diff_pos_sos_slope_doy <= 0))
[1] 0
# Difference in doy between eos and pos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_eos_slope_doy <= 0 |
                       EVI_diff_pos_eos_slope_doy <= 0 |
                       SAVI_diff_pos_eos_slope_doy <= 0))
[1] 0

Threshold method

# Growing season duration should be positive
nrow(final_RS_data %>% 
       dplyr::filter(NDVI_threshold_gsd <= 0 | EVI_threshold_gsd <= 0 | 
                       SAVI_threshold_gsd <= 0))
[1] 0
# Difference in value between pos and sos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_sos_threshold_value <= 0))
[1] 191
nrow(final_RS_data %>%
       dplyr::filter(EVI_diff_pos_sos_threshold_value <= 0))
[1] 188
nrow(final_RS_data %>%
       dplyr::filter(SAVI_diff_pos_sos_threshold_value <= 0))
[1] 78
# Difference in value between pos and eos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_eos_threshold_value <= 0))
[1] 218
nrow(final_RS_data %>%
       dplyr::filter(EVI_diff_pos_eos_threshold_value <= 0))
[1] 288
nrow(final_RS_data %>%
       dplyr::filter(SAVI_diff_pos_eos_threshold_value <= 0))
[1] 79
# Difference in doy between pos and sos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_sos_threshold_doy <= 0 |
                       EVI_diff_pos_sos_threshold_doy <= 0 |
                       SAVI_diff_pos_sos_threshold_doy <= 0))
[1] 31
# Difference in doy between eos and pos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_eos_threshold_doy <= 0 |
                       EVI_diff_pos_eos_threshold_doy <= 0 |
                       SAVI_diff_pos_eos_threshold_doy <= 0))
[1] 0

HERE: Run. Detect number of peaks in smoothed curves

# Set up parallel plan
plan(multisession, workers = min(parallel::detectCores() - 1))

# Define peak-counting function
count_peaks <- function(df) {
  # Convert 1D array column to numeric vector
  y <- as.numeric(df$value)
  
  peaks <- findpeaks(y, minpeakdistance = 30, threshold = 0.02)
  
  tibble(
    PlotObservationID = unique(df$PlotObservationID),
    index = unique(df$index),
    num_peaks = if (!is.null(peaks)) nrow(peaks) else 0
  )
}

# Apply peak counting in parallel
peak_counts <- GAM_data %>%
  filter(fit_type == "iter_3") %>%
  arrange(DOY) %>%
  group_by(PlotObservationID, index) %>%
  nest() %>%
  mutate(result = future_map(data, count_peaks, .progress = TRUE,
                             .options = furrr_options(scheduling = Inf))) %>%
  select(-data) %>%
  unnest(result)

# Summarize result
peak_counts_summary <- peak_counts %>% count(index, num_peaks)

HERE: save

# # Function to count peaks for each PlotObservationID
# count_peaks <- function(df) {
#   y <- df$value
#   peaks <- findpeaks(y, 
#                      # Minimum number of indices (e.g., DOY steps)
#                      # between two peaks
#                      minpeakdistance = 30, 
#                      # Minimum vertical difference between a peak
#                      # and its surrounding value
#                      threshold = 0.02)
#   num_peaks <- if (!is.null(peaks)) nrow(peaks) else 0
#   return(tibble(PlotObservationID = unique(df$PlotObservationID),
#                 num_peaks = num_peaks))
# }
# 
# # Apply to each group
# peak_counts <- GAM_data %>%
#   mutate(value = map_dbl(value, 1)) %>%
#   dplyr::filter(fit_type == "iter_3") %>%
#   arrange(DOY) %>%
#   group_by(PlotObservationID, index) %>%
#   group_modify(~ count_peaks(.x)) %>%
#   ungroup()
# 
# # View result
# peak_counts %>% count(index, num_peaks)

Plot number of peaks

peak_counts %>% count(index, num_peaks) %>%
  ggplot(aes(x = index, y = n, fill = factor(num_peaks))) +
  geom_bar(stat = "identity", position = position_dodge())

EVI gives less problems, maybe use only this one?

Add number of peaks to data

final_RS_data <- final_RS_data %>%
  left_join(peak_counts %>%
              pivot_wider(names_from = index, values_from = num_peaks,
                          names_glue = "{index}_{.value}"))

Plot fit and moments for PlotObservationIDs with zero peaks

Further checks (EVI)

Slope method

# Difference in value between pos and sos should be positive
nrow(final_RS_data %>%
       dplyr::filter(EVI_diff_pos_sos_slope_value <= 0))
[1] 91
final_RS_data %>%
  dplyr::filter(EVI_diff_pos_sos_slope_value <= 0) %>%
  count(EVI_num_peaks)
Error in `count()`:
! Must group by variables found in `.data`.
✖ Column `EVI_num_peaks` is not found.
Run `]8;;x-r-run:rlang::last_trace()rlang::last_trace()]8;;` to see where the error occurred.

Threshold method

Add some columns needed

final_RS_data <- final_RS_data %>%
  left_join(
    data_RS_S2_bands_indices %>%
      distinct(PlotObservationID, year, biogeo, unit, Lctnmth)
    )

Add canopy height data

Read the data:

data_RS_CH <- read_csv(
  "C:/Data/MOTIVATE/MOTIVATE_RS_data/Canopy_Height_1m/Europe_points_CanopyHeight_1m.csv")
db_Europa <- read_csv(
  here("..", "DB_first_check", "data", "clean","db_Europa_20250107.csv")
  )
data_RS_CH_ID <- db_Europa %>%
  select(PlotObservationID, obs_unique_id) %>%
  right_join(data_RS_CH %>%
              # Rename to be able to join on this column
              rename(obs_unique_id = obs_unique)) %>%
  select(PlotObservationID, canopy_height)

Join:

final_RS_data <- final_RS_data %>%
  left_join(data_RS_CH_ID %>%
              mutate(PlotObservationID = factor(PlotObservationID)))

Save to clean data

write_tsv(final_RS_data,
          here("data", "clean","final_RS_data_bands_S2_all_20250804.csv"))

Session info

sessionInfo()
R version 4.5.1 (2025-06-13 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26100)

Matrix products: default
  LAPACK version 3.12.1

locale:
[1] LC_COLLATE=Spanish_Spain.utf8  LC_CTYPE=Spanish_Spain.utf8    LC_MONETARY=Spanish_Spain.utf8
[4] LC_NUMERIC=C                   LC_TIME=Spanish_Spain.utf8    

time zone: Europe/Madrid
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] beepr_2.0        pracma_2.4.4     progressr_0.15.1 furrr_0.3.1      future_1.67.0   
 [6] mgcv_1.9-3       nlme_3.1-168     knitr_1.50       sf_1.0-21        dtplyr_1.3.2    
[11] here_1.0.2       lubridate_1.9.4  forcats_1.0.0    stringr_1.5.2    dplyr_1.1.4     
[16] purrr_1.1.0      readr_2.1.5      tidyr_1.3.1      tibble_3.3.0     ggplot2_4.0.0   
[21] tidyverse_2.0.0  signal_1.8-1    

loaded via a namespace (and not attached):
 [1] gtable_0.3.6       xfun_0.53          lattice_0.22-7     tzdb_0.5.0        
 [5] vctrs_0.6.5        tools_4.5.1        generics_0.1.4     parallel_4.5.1    
 [9] proxy_0.4-27       pkgconfig_2.0.3    Matrix_1.7-4       KernSmooth_2.23-26
[13] data.table_1.17.8  RColorBrewer_1.1-3 S7_0.2.0           lifecycle_1.0.4   
[17] compiler_4.5.1     farver_2.1.2       textshaping_1.0.3  codetools_0.2-20  
[21] htmltools_0.5.8.1  class_7.3-23       yaml_2.3.10        crayon_1.5.3      
[25] pillar_1.11.0      MASS_7.3-65        classInt_0.4-11    parallelly_1.45.1 
[29] tidyselect_1.2.1   digest_0.6.37      stringi_1.8.7      listenv_0.9.1     
[33] labeling_0.4.3     splines_4.5.1      rprojroot_2.1.1    fastmap_1.2.0     
[37] grid_4.5.1         cli_3.6.5          magrittr_2.0.3     utf8_1.2.6        
[41] e1071_1.7-16       withr_3.0.2        scales_1.4.0       bit64_4.6.0-1     
[45] timechange_0.3.0   rmarkdown_2.29     audio_0.1-11       globals_0.18.0    
[49] bit_4.6.0          ragg_1.5.0         hms_1.1.3          evaluate_1.0.5    
[53] rlang_1.1.6        Rcpp_1.1.0         glue_1.8.0         DBI_1.2.3         
[57] vroom_1.6.5        rstudioapi_0.17.1  R6_2.6.1           systemfonts_1.2.3 
[61] units_0.8-7       
---
title: "Script to work with S2 bands derived from GEE"
subtitle: "Read and manipulation data, calculate indices"
author: "Alicia Valdés"
date: "`r format(Sys.time(), '%d %B %Y')`"
output:
  pdf_document: default
  html_notebook: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(warning = FALSE)
```

# Load libraries

```{r}
library(signal)
library(tidyverse)
library(here)
library(lubridate)
library(dtplyr)
library(sf)
library(knitr)
library(mgcv)
library(future)
library(furrr)
library(progressr)
library(pracma)
```

# Set a simple console progress bar

```{r}
handlers("txtprogressbar") # Simple console progress bar
```

# Supress package messages

```{r}
suppressPackageStartupMessages({
  library(mgcv)
  library(nlme)
})
```

# Load previously created objects

```{r}
load(file = "objects/data_RS_S2_bands_indices.Rdata")
load(file = "objects/GAM_data_S2.Rdata")
load(file = "objects/smoothed_data_S2.Rdata")
load(file = "objects/monthly_avg_indices_S2.Rdata")
```

# Load Resurvey db

```{r}
db_Europa_allobs <- read_csv(
  here("data", "clean", "db_Europa_allobs.csv")) %>%
  select(PlotObservationID, EUNISa_1, EUNISa_1_descr,
         EUNISa_2, EUNISa_2_descr) %>%
  mutate(PlotObservationID = factor(PlotObservationID),
         EUNISa_1 = factor(EUNISa_1), EUNISa_2 = factor(EUNISa_2))
```

# Define printall function

```{r}
printall <- function(tibble) {
  print(tibble, width = Inf)
  }
```

# Read files with band data

I got these files using the GEE code prepared by Bea.

These files contain all observations in the ReSurvey database from 2016 onward. In order to avoid computation problems in GEE, biogeographical units that contain more than 4500 points have been subdivided in ArcGIS.

```{r}
# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/S2/Bands/all"

# List CSV files
csv_files <- list.files(folder_path, full.names = TRUE, recursive = TRUE)

# Function to extract biogeo and unit from the filename
extract_info <- function(filename) {
  first_word <- strsplit(filename, "_")[[1]][1]
  biogeo <- str_extract(first_word,
                        "^(ALP|ANA|ARC|ATL|BLACKSEA|BOR|CON|MACARONESIA|MED|PANONIA|STEPPIC)")
  unit <- str_remove(first_word, biogeo)
  if (is.na(unit) || unit == "") unit <- NA_character_
  list(biogeo = biogeo, unit = unit)
  }


# Define column types: force RSrvypl to character, others auto-detected
custom_col_types <- cols(
  RSrvypl = col_character(),
  RSrvyst = col_character(),
  default = col_guess()
)

# Read and process each file
data_list <- lapply(csv_files, function(file) {
  info <- extract_info(basename(file)) # Use only the filename
  
  # Read the file
  df <- read_csv(file, col_types = custom_col_types) %>%
    # Remove columns that give column type problems when combining data
    select(-starts_with("EUNIS"), -starts_with("ReSurvey")) %>%
    mutate(biogeo = info$biogeo, unit = info$unit)
  
  return(df)
  })

# Combine all data
data_RS_S2_bands <- bind_rows(data_list) %>%
  rename(PlotObservationID = PltObID)

# View the resulting tibble
print(data_RS_S2_bands)

# Counts per biogeo and unit
print(data_RS_S2_bands %>% count(biogeo, unit), n = 100)
```

# Some checks

Check that the year in the date of the images is not different to the sampling year:

```{r}
data_RS_S2_bands %>% dplyr::filter(year != year(date))
```

Check how many different images are for each observation, date and time:

```{r}
data_RS_S2_bands %>% group_by(PlotObservationID, date, time_utc) %>%
  summarise(n_images = n_distinct(image_id), .groups = "drop") %>%
  count(n_images)
```

# Average the bands

When there is more than one image for each point and day, average the values of the bands:

```{r eval=FALSE, include=FALSE}
# Summarize the band values conditionally
band_summary <- data_RS_S2_bands %>%
  group_by(PlotObservationID, date) %>%
  summarise(
    n_images = n_distinct(image_id),
    B11 = if (n_images > 1) mean(B11, na.rm = TRUE) else first(B11),
    B2  = if (n_images > 1) mean(B2,  na.rm = TRUE) else first(B2),
    B3  = if (n_images > 1) mean(B3,  na.rm = TRUE) else first(B3),
    B4  = if (n_images > 1) mean(B4,  na.rm = TRUE) else first(B4),
    B8  = if (n_images > 1) mean(B8,  na.rm = TRUE) else first(B8),
    .groups = "drop"
  ) 

# Calculate how many different days for each PlotObservationID
n_days <- band_summary %>%
  group_by(PlotObservationID) %>%
  summarise(n_days = n_distinct(date))

# Join back to original data
data_RS_S2_bands_updated <- data_RS_S2_bands %>%
  # Remove old band values
  select(-B11, -B2, -B3, -B4, -B8) %>%
  # Join band_summary
  left_join(band_summary, by = c("PlotObservationID", "date")) %>%
  # Keep one row per group
  distinct(PlotObservationID, date, .keep_all = TRUE) %>%
  # Remove unwanted columns
  select(-`system:index`, -image_id, -.geo, -time_utc, -timestamp) %>%
  # Join
  left_join(n_days)
```

# Calculate indices

```{r eval=FALSE, include=FALSE}
# Calculate indices
data_RS_S2_bands_indices <- data_RS_S2_bands_updated %>%
  # Set PlotObservationID as factor
  mutate(PlotObservationID = factor(PlotObservationID)) %>%
  # Rename the bands
  rename(blue = B2, green = B3, red = B4, NIR = B8, SWIR = B11) %>%
  # Scale the bands
  mutate(blue = blue / 10000, green = green / 10000, red = red / 10000,
         NIR = NIR / 10000, SWIR = SWIR / 10000) %>%
  # Create column that combines the day of the month and the time
  mutate(
    date = as.POSIXct(date),
    # Normalize the dates to a fixed year (2000)
    # so that seasonal patterns across different years can be compared visually
    day_month = as.POSIXct(format(date, "2000-%m-%d"))) %>%
  # Create column with DOY
  mutate(DOY = yday(date)) %>%
  # Calculate NDVI
  mutate(NDVI = (NIR - red) / (NIR + red),
         EVI = (NIR - red) * 2.5 / (NIR + 6 * red - 7.5 * blue + 1),
         SAVI = (NIR - red) * 1.5 / (NIR + red + 0.5),
         NDMI = (NIR - SWIR) / (NIR + SWIR),
         NDWI = (green - NIR) / (green + NIR)) %>%
  # Setting values of indices outside expected ranges (errors) to NA
  mutate(EVI = if_else(EVI > 1 | EVI < -1, NA, EVI)) # 12166 values of EVI as NA
```

Save:

```{r eval=FALSE, include=FALSE}
save(data_RS_S2_bands_indices, file = "objects/data_RS_S2_bands_indices.Rdata")
```

Plot n_daytime:

```{r}
data_RS_S2_bands_indices %>%
  group_by(PlotObservationID) %>%
  summarise(n_days = first(n_days)) %>% ungroup() %>%
  ggplot(aes(x = n_days)) + geom_histogram(color = "black", fill = "white") +
  theme_minimal()
```

# Compute phenological metrics from models fitted to time series data

## Function

## HERE (already done in Landsat): Modify function and rerun everything from here (TBD)

```{r eval=FALSE, include=FALSE}
# Modify these parts in the function below to ensure that
# sos_threshold < pos and eos_threshold > pos
sos_threshold <- if (!is.na(u_sos) && !is.na(pos)) {
  candidates <- x[which(pred >= u_sos & x <= pos)]
  if (length(candidates) > 0) candidates[1] else NA_real_
} else NA_real_

eos_threshold <- if (!is.na(u_eos)) {
  x[rev(which(x > pos & pred >= u_eos))[1]]
} else NA_real_
```

Using GAMs, reweighting and 3 iterations.

Using both a change detection method (maximum slope) and a threshold method (50% amplitude) to calculate sos and eos.

Approach similar to https://doi.org/10.1016/j.jag.2020.102172 for GAM fitting and change detection method, and to https://www.mdpi.com/2072-4292/12/22/3738 fot threshold method.

Define function to compute phenology metrics using GAM fit and NDVI / EVI / SAVI:

```{r}
compute_metrics_models <- function(df, index_cols = c("NDVI", "EVI", "SAVI")) {
  suppressPackageStartupMessages({
    library(mgcv)
    library(nlme)
    })
  
  plan(multisession)  # Set up parallel processing
  
  # Create a list of index-specific data frames
  index_dfs <- lapply(index_cols, function(index_col) {
    list(index_col = index_col, df = df %>%
           select(DOY, PlotObservationID, all_of(index_col)))
    })
  
  # Define the processing function for each index
  process_index <- function(index_data) {
    index_col <- index_data$index_col
    df_index <- index_data$df %>%
      filter(!is.na(.data[[index_col]])) %>%
      arrange(DOY)
    
    plot_id <- unique(df_index$PlotObservationID)

    if (nrow(df_index) < 10) {
      message("  Skipped: insufficient data (< 10 rows)")
      return(tibble(PlotObservationID = plot_id, index = index_col,
                    sos_slope = NA_real_, sos_threshold = NA_real_,
                    pos = NA_real_, eos_slope = NA_real_, 
                    eos_threshold = NA_real_, auc_slope = NA_real_,
                    auc_threshold = NA_real_, Vmax = NA_real_,
                    DOY = df_index$DOY, value = NA_real_))
    }
    
    # Replace early/late DOY values
    base_value_early <- mean(df_index %>% filter(DOY <= 50) %>% 
                               pull(index_col), na.rm = TRUE)
    base_value_late  <- mean(df_index %>% filter(DOY >= 315) %>% 
                               pull(index_col), na.rm = TRUE)

    df_index <- df_index %>%
      mutate(!!index_col := case_when(
        DOY <= 50 ~ base_value_early,
        DOY >= 315 ~ base_value_late,
        TRUE ~ .data[[index_col]]
      ))

    x <- df_index$DOY
    y <- df_index[[index_col]]
    weights <- rep(1, length(y))
    
    # GAM fit
    pred <- NULL
    for (i in 1:3) {
      gam_fit <- tryCatch({
        mgcv::bam(y ~ s(x, bs = "tp"),weights = weights)
        }, error = function(e) {
          message("  GAM fitting failed for ", plot_id, " - ", index_col, ": ", 
                  e$message)
          return(NULL)
          })
      if (is.null(gam_fit)) {
        return(tibble(
          PlotObservationID = plot_id,
          index = index_col,
          sos_slope = NA_real_,
          sos_threshold = NA_real_,
          pos = NA_real_,
          eos_slope = NA_real_, 
          eos_threshold = NA_real_, 
          auc_slope = NA_real_,
          auc_threshold = NA_real_, 
          Vmin_pre = NA_real_, 
          Vmin_post = NA_real_,
          Vmax = NA_real_, 
          u_sos = NA_real_, 
          u_eos = NA_real_,
          DOY = df_index$DOY,
          value = NA_real_))
        }
      
      pred <- tryCatch({
        predict(gam_fit, newdata = tibble(x = x))
        }, error = function(e) {
          message("Prediction failed for ", plot_id, " - ", index_col, ": ",
                  e$message)
          return(rep(NA_real_, length(x)))
          })
      
      idx_between <- which(x > 50 & x < 315 & !is.na(pred) & pred != 0)
      weights <- rep(1, length(y))
      weights[idx_between] <- (y[idx_between] / (pred[idx_between] + 1e-6))^4
      weights[weights > 1 | is.na(weights)] <- 1
      }
    
    # Compute metrics
    slope <- c(NA, diff(pred))
    idx <- which(x >= 50 & x <= 315)
    pos <- if (length(idx) > 0) x[idx][which.max(pred[idx])] else NA_real_

    sos_slope <- if (!is.na(pos)) {
      idx <- which(x < pos)
      if (length(idx) > 0) x[idx][which.max(slope[idx])] else NA_real_
    } else NA_real_

    eos_slope <- if (!is.na(pos)) {
      idx <- which(x > pos)
      if (length(idx) > 0) x[idx][which.min(slope[idx])] else NA_real_
    } else NA_real_

    integration_idx_slope <- which(x >= sos_slope & x <= 
                                     eos_slope & !is.na(pred))
    auc_slope <- if (length(integration_idx_slope) > 1) {
      sum(diff(x[integration_idx_slope]) * 
            zoo::rollmean(pred[integration_idx_slope], 2))
      } else NA_real_
    
    # Vmin antes y después del pico
    Vmin_pre <- if (!is.na(pos)) min(pred[x <= pos], na.rm = TRUE)else NA_real_
    Vmin_post <- if (!is.na(pos)) min(pred[x >= pos], na.rm = TRUE) else NA_real_
    Vmax <- max(pred, na.rm = TRUE)
    
    # Umbrales relativos
    p <- 0.5
    u_sos <- if (!is.na(Vmin_pre)) Vmin_pre + p * (Vmax - Vmin_pre) else NA_real_
    u_eos <- if (!is.na(Vmin_post)) Vmin_post + p * (Vmax - Vmin_post) else NA_real_
    
    # DOY donde se cruzan los umbrales
    sos_threshold <- if (!is.na(u_sos)) x[which(pred >= u_sos)[1]] else NA_real_
    eos_threshold <- if (!is.na(u_eos)) x[rev(which(pred >= u_eos))[1]] else NA_real_
    
    integration_idx_threshold <- which(x >= sos_threshold & 
                                         x <= eos_threshold & !is.na(pred))
    auc_threshold <- if (length(integration_idx_threshold) > 1) {
      sum(diff(x[integration_idx_threshold]) * 
            zoo::rollmean(pred[integration_idx_threshold], 2))
      } else NA_real_
    
    # 1. Predicciones por DOY
    fits_df <- tibble(
      PlotObservationID = unique(df_index$PlotObservationID),
      DOY = x,
      value = pred,
      index = index_col
      )
    
    # 2. Métricas resumen
    metrics_df <- tibble(
      PlotObservationID = unique(df_index$PlotObservationID),
      index = index_col,
      sos_slope = sos_slope,
      sos_threshold = sos_threshold,
      pos = pos,
      eos_slope = eos_slope,
      eos_threshold = eos_threshold,
      auc_slope = auc_slope,
      auc_threshold = auc_threshold,
      Vmin_pre = Vmin_pre,
      Vmin_post = Vmin_post,
      Vmax = Vmax,
      u_sos = u_sos,
      u_eos = u_eos
      )
    
    # 3. Unir por PlotObservationID, index
    final_df <- left_join(fits_df, metrics_df, 
                          by = c("PlotObservationID", "index"))
  }
  
  # Run in parallel
  results <- future_map(index_dfs, process_index, .progress = TRUE)
  results <- purrr::compact(results)  # removes NULLs
  if (length(results) == 0) return(tibble())  # or return(NULL)
  bind_rows(results)
}
```

## Calculation

Apply the function with batch processing

```{r eval=FALSE, message=FALSE, include=FALSE}
plan(multisession, workers = availableCores() - 1)

ids <- unique(data_RS_S2_bands_indices$PlotObservationID)
batches <- split(ids, ceiling(seq_along(ids) / 50))  # batches of 50

start_total <- Sys.time()

GAM_data <- map_dfr(seq_along(batches), function(i) {
  batch_ids <- batches[[i]]
  total_batches <- length(batches)
  batch_file <- file.path("objects/GAM_batches_S2", paste0("batch_", i, ".rds"))

  if (file.exists(batch_file)) {
    message("✅ Batch  ", i, " of ", total_batches, 
            " already processed. Loading from file.")
    return(readRDS(batch_file))
  }

  message("🔄 Processing batch  ", i, " of ", total_batches, " with ",
          length(batch_ids), " IDs...")

  start_batch <- Sys.time()

  result <- data_RS_S2_bands_indices %>%
    filter(PlotObservationID %in% batch_ids) %>%
    group_split(PlotObservationID) %>%
    set_names(map_chr(., ~ as.character(unique(.x$PlotObservationID)))) %>%
    future_map_dfr(~ compute_metrics_models(df = .,
                                            index_cols = c("NDVI", "EVI", "SAVI")),
                   .progress = TRUE)

  end_batch <- Sys.time()
  duration <- round(difftime(end_batch, start_batch, units = "mins"), 2)
  message("⏱️ Batch time ", i, ": ", duration, " minutes")

  message("💾 Saving batch ", i, " to file...")
  saveRDS(result, batch_file)
  message("✅ Batch ", i, " saved.") 

  result
})

end_total <- Sys.time()
total_time <- round(difftime(end_total, start_total, units = "mins"), 2)
message("⏱️ Total time: ", total_time, " minutes")
```

```{r}
plan(sequential)
```

## Save

Look:

```{r}
GAM_data
```

Save as an object:

```{r eval=FALSE, include=FALSE}
save(GAM_data, file = "objects/GAM_data_S2.Rdata")
```

## Extract average values of indices per month

Extract average values of indices per month and AUC between March and October 

```{r}
extract_monthly_avg_indices <- function(
  GAM_data, 
  monthly_doys = list("01" = 1:31, "02" = 32:59, "03" = 60:90, "04" = 91:120, 
                      "05" = 121:151, "06" = 152:181, "07" = 182:212, 
                      "08" = 213:243, "09" = 244:273, "10" = 274:304,
                      "11" = 305:334, "12" = 335:365)) {
  
  monthly_df <- GAM_data %>%
    mutate(month = purrr::map_chr(DOY, function(doy) {
      month_name <- names(monthly_doys)[sapply(monthly_doys, 
                                               function(r) doy %in% r)]
      if (length(month_name) > 0) month_name else NA_character_
    })) %>%
    dplyr::filter(!is.na(month)) %>%
    group_by(PlotObservationID, index, month) %>%
    summarise(avg_value = mean(value, na.rm = TRUE), .groups = "drop") %>%
    mutate(avg_value = ifelse(is.infinite(avg_value), NA, avg_value)) %>%
    arrange(PlotObservationID, match(month, names(monthly_doys))) %>%
    pivot_wider(names_from = month, values_from = avg_value, 
                names_prefix = "avg_value_")
  
  # Calcular AUC entre marzo y octubre usando regla del trapecio
  months_auc <- c("03", "04", "05", "06", "07", "08", "09", "10")
  # DOY aproximado del centro de cada mes
  doy_midpoints <- c(75, 105, 135, 165, 195, 225, 255, 285)  
  
  monthly_df <- monthly_df %>%
    rowwise() %>%
    mutate(
      auc_mar_oct = {
        values <- c_across(all_of(paste0("avg_value_", months_auc)))
        if (any(is.na(values))) NA_real_ else sum(diff(doy_midpoints) *
                                                    zoo::rollmean(values, 2))
      }
    ) %>%
    ungroup()
  
  return(monthly_df)
}
```

```{r eval=FALSE, include=FALSE}
monthly_avg_indices <- extract_monthly_avg_indices(GAM_data)
```

Save as an object:

```{r eval=FALSE, include=FALSE}
save(monthly_avg_indices, file = "objects/monthly_avg_indices_S2.Rdata")
```

## Assess time series quality

For the time series to be acceptable, it should have a reasonable number of time points, and these points should be distributed along almost all months (could be ok to miss the winter months).

In GAM data, check how many time points are there for each PlotObservationID, how many months, and which months are missing.

```{r}
ts_quality <- GAM_data %>%
  # Filter only NDVI (all indices will have the same time points)
  dplyr::filter(index == "NDVI") %>%
  # Get month from DOY
  mutate(month = month(ymd("2020-01-01") + days(DOY - 1))) %>%
  # For each PlotObservationID
  group_by(PlotObservationID) %>%
  # Get the number of time points (days) and the number of months
  summarise(
    n_days = n_distinct(DOY),
    n_months = n_distinct(month),
    .groups = "drop"
  ) %>%
  left_join(GAM_data %>%
              # Filter only NDVI
              dplyr::filter(index == "NDVI") %>%
              # Get month from DOY
              mutate(month = month(ymd("2020-01-01") + days(DOY - 1))) %>%
              # Get unique values of PlotObservationID and month
              distinct(PlotObservationID, month) %>%
              # Add 1 as value
              mutate(value = 1) %>%
              # Reshape to wide format and add zeros when month is missing
              pivot_wider(
                names_from = month,
                names_prefix = "month",
                values_from = value,
                values_fill = 0),
            by = "PlotObservationID")
```

Histograms time points and n months:

```{r}
ggplot(ts_quality, aes(x = n_days)) +
  geom_histogram(color = "black", fill = "white") +
  xlab("Number of time points (days) in the S2 time series") +
  theme_minimal()
ggplot(ts_quality, aes(x = n_months)) +
  geom_histogram(color = "black", fill = "white") +
  xlab("Number of months in the S2 time series") +
  theme_minimal()
```

Count how many PlotObservationIDs have missing data (value 0) for each month:

```{r}
obs_missing_month <- ts_quality %>%
  summarise(across(starts_with("month"), ~ sum(.x == 0))) %>%
  pivot_longer(cols = everything(), names_to = "month", values_to = "nobs_missing")

ggplot(obs_missing_month %>%
         mutate(month = factor(month, levels = paste0("month", 1:12))), 
       aes(x = month, y = nobs_missing)) + geom_bar(stat = "identity") +
  ylab("Number of PlotObservationID with missing data") +
  ggtitle("Missing data in S2 time series") +
  theme_minimal()
```

Add quality flag:

```{r}
ts_quality_flag <- ts_quality %>%
  rowwise() %>%
  mutate(
    #  If 2 consecutive months of the period March-October are missing
    # quality_flag = 0
    quality_flag = {
      months <- c_across(month3:month10)
      if (any(months[-length(months)] == 0 & months[-1] == 0)) 0 else 1
    }
  ) %>%
  ungroup()
```

```{r}
ts_quality_flag %>% count(quality_flag)
```

## Boxplot comparing moments for different indices

```{r}
GAM_data %>% 
  select(PlotObservationID, index, sos_slope, sos_threshold, pos, eos_slope,
         eos_threshold) %>% distinct() %>%
  pivot_longer(cols = c(sos_slope, sos_threshold, pos, eos_slope, eos_threshold),
               names_to = "moment", values_to = "value") %>%
  ggplot(aes(x = moment, y = value, fill = index)) + geom_boxplot() +
  theme_minimal()
```

## Plot fit and moments for each PlotObservationID

### Quality = 1

```{r}
# Get unique IDs with quality_flag == 1
ids_q1 <- ts_quality_flag %>%
  dplyr::filter(quality_flag == 1) %>%
  mutate(PlotObservationID = droplevels(PlotObservationID)) %>%
  pull(PlotObservationID)
GAM_data_ids_q1 <- GAM_data %>%
  # Join to get biogeo and unit
  left_join(data_RS_S2_bands_indices %>%
              select(PlotObservationID, biogeo, unit) %>%
              distinct()) %>%
  # Join to get EUNIS info
   left_join(db_Europa_allobs %>%
               select(PlotObservationID, EUNISa_1, EUNISa_1_descr,
                      EUNISa_2, EUNISa_2_descr)) %>%
  # Join to get original values of indices
  left_join(data_RS_S2_bands_indices %>%
              select(PlotObservationID, DOY, NDVI, EVI, SAVI) %>%
              pivot_longer(cols = c(NDVI, EVI, SAVI), names_to = "index", 
                           values_to = "value_orig")) %>%
  # Join to get ts_quality data
  left_join(ts_quality_flag %>% select(PlotObservationID, quality_flag)) %>%
  # Keep only those with quality_flag == 1
  dplyr::filter(quality_flag == 1)
```

```{r eval=FALSE, include=FALSE}
# Get unique PlotObservationIDs
unique_ids1 <- ids_q1

# Create and store plots in a list
ts_plots_q1 <- map(unique_ids1, function(id) {
  plot_data <- GAM_data_ids_q1 %>%
    mutate(PlotObservationID = as.character(PlotObservationID)) %>%
    dplyr::filter(PlotObservationID == id) 
  
  # Extract metadata for title
  metadata <- plot_data %>%
    select(biogeo, unit, EUNISa_1, EUNISa_2, quality_flag) %>%
    distinct()
  
  ggplot() +
    # Raw data points
    geom_point(data = plot_data,aes(x = DOY, y = value_orig), alpha = 0.5) +
    geom_line(data = plot_data, aes(x = DOY, y = value), 
              size = 0.5, color = "blue") +
    geom_vline(data = plot_data %>% distinct(index, sos_slope),
               aes(xintercept = sos_slope, group = index),
               linetype = "dashed", size = 0.5, color = "red") +
    geom_vline(data = plot_data %>% distinct(index, sos_threshold),
               aes(xintercept = sos_threshold, group = index),
               linetype = "dashed", size = 0.5, color = "darkgreen") +
    geom_vline(data = plot_data %>% distinct(index, pos),
               aes(xintercept = pos, group = index),
               linetype = "dotted", size = 0.5, color = "blue") +
    geom_vline(data = plot_data %>% distinct(index, eos_slope),
               aes(xintercept = eos_slope, group = index),
               linetype = "dashed", size = 0.5, color = "red") +
    geom_vline(data = plot_data %>% distinct(index, eos_threshold),
               aes(xintercept = eos_threshold, group = index),
               linetype = "dashed", size = 0.5, color = "darkgreen") +
    facet_grid(cols = vars(index)) +
    labs(
      title = glue::glue("{id} | {metadata$biogeo}{metadata$unit} | {metadata$EUNISa_1} | {metadata$EUNISa_2} | Quality: {metadata$quality_flag}"),
      x = "Day of Year",
      y = "Index Value"
    ) +
    theme_minimal() + theme(legend.position = "top")
})

# Name the list by PlotObservationID
names(ts_plots_q1) <- unique_ids1

# Display the first plot
ts_plots_q1[1]
```

Save each plot to a file:

```{r eval=FALSE, include=FALSE}
walk2(ts_plots_q1, seq_along(ts_plots_q1), ~ ggsave(
  filename = paste0("C:/Analyses/MOTIVATE_validation/output/figures/phenology/ts_q1_S2/ts_plots_q1", .y, ".jpeg"),
  plot = .x,
  width = 8,
  height = 5
))
```

### Quality = 0

```{r}
# Get unique IDs with quality_flag == 0
ids_q0 <- ts_quality_flag %>%
  dplyr::filter(quality_flag == 0) %>%
  mutate(PlotObservationID = droplevels(PlotObservationID)) %>%
  pull(PlotObservationID)
GAM_data_ids_q0 <- GAM_data %>%
  # Join to get biogeo and unit
  left_join(data_RS_S2_bands_indices %>%
              select(PlotObservationID, biogeo, unit) %>%
              distinct()) %>%
  # Join to get EUNIS info
   left_join(db_Europa_allobs %>%
               select(PlotObservationID, EUNISa_1, EUNISa_1_descr,
                      EUNISa_2, EUNISa_2_descr)) %>%
  # Join to get original values of indices
  left_join(data_RS_S2_bands_indices %>%
              select(PlotObservationID, DOY, NDVI, EVI, SAVI) %>%
              pivot_longer(cols = c(NDVI, EVI, SAVI), names_to = "index", 
                           values_to = "value_orig")) %>%
  # Join to get ts_quality data
  left_join(ts_quality_flag %>%
              select(PlotObservationID, n_months, quality_flag)) %>%
  # Keep only those with quality_flag == 0
  dplyr::filter(quality_flag == 0)
```

```{r eval=FALSE, include=FALSE}
# Get unique PlotObservationIDs
unique_ids0 <- ids_q0

# Create and store plots in a list
ts_plots_q0 <- map(unique_ids0, function(id) {
  plot_data <- GAM_data_ids_q0 %>%
    mutate(PlotObservationID = as.character(PlotObservationID)) %>%
    dplyr::filter(PlotObservationID == id) 
  
  # Extract metadata for title
  metadata <- plot_data %>%
    select(biogeo, unit, EUNISa_1, EUNISa_2, quality_flag) %>%
    distinct()
  
  ggplot() +
    # Raw data points
    geom_point(data = plot_data,aes(x = DOY, y = value_orig), alpha = 0.5) +
    geom_line(data = plot_data, aes(x = DOY, y = value), 
              size = 0.5, color = "blue") +
    geom_vline(data = plot_data %>% distinct(index, sos_slope),
               aes(xintercept = sos_slope, group = index),
               linetype = "dashed", size = 0.5, color = "red") +
    geom_vline(data = plot_data %>% distinct(index, sos_threshold),
               aes(xintercept = sos_threshold, group = index),
               linetype = "dashed", size = 0.5, color = "darkgreen") +
    geom_vline(data = plot_data %>% distinct(index, pos),
               aes(xintercept = pos, group = index),
               linetype = "dotted", size = 0.5, color = "blue") +
    geom_vline(data = plot_data %>% distinct(index, eos_slope),
               aes(xintercept = eos_slope, group = index),
               linetype = "dashed", size = 0.5, color = "red") +
    geom_vline(data = plot_data %>% distinct(index, eos_threshold),
               aes(xintercept = eos_threshold, group = index),
               linetype = "dashed", size = 0.5, color = "darkgreen") +
    facet_grid(cols = vars(index)) +
    labs(
      title = glue::glue("{id} | {metadata$biogeo}{metadata$unit} | {metadata$EUNISa_1} | {metadata$EUNISa_2} | Quality: {metadata$quality_flag}"),
      x = "Day of Year",
      y = "Index Value"
    ) +
    theme_minimal() + theme(legend.position = "top")
})

# Name the list by PlotObservationID
names(ts_plots_q0) <- unique_ids0

# Display the first plot
ts_plots_q0[1]
```

Save each plot to a file:

```{r eval=FALSE, include=FALSE}
walk2(ts_plots_q0, seq_along(ts_plots_q0), ~ ggsave(
  filename = paste0("C:/Analyses/MOTIVATE_validation/output/figures/phenology/ts_q0_S2/ts_plots_q0", .y, ".jpeg"),
  plot = .x,
  width = 8,
  height = 5
))
```

# Smooth the time series of NDMI and NDWI

Using GAM, without replacing values in DOY 1–50 and DOY 315–end with separate base values, later use only unweighted GAM.

```{r}
compute_unweighted_fit <- function(
    # Data frame df with index values over time (DOY)
    df, 
    # Name of the vegetation indices columns (e.g., "NDVI", "EVI", "SAVI)
    index_cols = c("NDMI", "NDWI")
) {
  # Initialize list to store results
  fits_list <- list()
  
  # Loop over each index column
  for (index_col in index_cols) {
    df_index <- df %>%
      # Remove rows with missing index values and sort data by DOY
      filter(!is.na(.data[[index_col]])) %>% arrange(DOY)
    
    # Extract x (DOY) and y (index) vectors for modelling
    x <- df_index$DOY
    y <- df_index[[index_col]]
    
    # If there are fewer than 11 observations or all values are NA, skip
    if (length(x) < 11 || all(is.na(y))) {
      next
    }
    
    # Fit GAM (unweighted) with a thin plate spline (bs = "tp")
    # to smooth the index curve
    gam_unweighted <- mgcv::bam(y ~ s(x, bs = "tp"))
    pred <- predict(gam_unweighted, newdata = tibble(x = x))
    
    # Create tibble to store original and predicted index values
    fits_df <- tibble(
      PlotObservationID = unique(df$PlotObservationID),
      DOY = x,
      index = index_col,
      value = pred
    )
    
    fits_list[[index_col]] <- fits_df
  }
  
  if (length(fits_list) == 0) {
    return(tibble())
  }
  
  bind_rows(fits_list)
}
```

Apply the function:

```{r eval=FALSE, include=FALSE}
plan(multisession, workers = parallel::detectCores() - 1)

# Apply the function to each PlotObservationID
execution_time <- system.time({
  with_progress({
    smoothed_data <- data_RS_S2_bands_indices %>%
      group_split(PlotObservationID) %>%
      set_names(map_chr(., ~ as.character(unique(.x$PlotObservationID)))) %>%
      future_map_dfr(~ compute_unweighted_fit(df = .,
                                       index_cols = c("NDMI", "NDWI")),
                     .progress = TRUE)
  })
})

print(execution_time)
```

Look:

```{r}
smoothed_data
```

Save as an object:

```{r eval=FALSE, include=FALSE}
save(smoothed_data, file = "objects/smoothed_data_S2.Rdata")
```

## Plot fit and moments for each PlotObservationID

```{r}
smoothed_data_ids <- smoothed_data %>%
  # Join to get biogeo and unit
  left_join(data_RS_S2_bands_indices %>%
              select(PlotObservationID, biogeo, unit) %>%
              distinct()) %>%
  # Join to get EUNIS info
   left_join(db_Europa_allobs %>%
               select(PlotObservationID, EUNISa_1, EUNISa_1_descr,
                      EUNISa_2, EUNISa_2_descr)) %>%
  mutate(PlotObservationID = as.character(PlotObservationID))
```

```{r eval=FALSE, include=FALSE}
# Get unique PlotObservationIDs
unique_ids <- unique(smoothed_data_ids$PlotObservationID)

# Create and store plots in a list
ts_plots_NDMI_NDWI<- map(unique_ids, function(id) {
  plot_data <- smoothed_data_ids %>% 
    dplyr::filter(PlotObservationID == id)
  
  # Extract metadata for title
  metadata <- plot_data %>%
    select(biogeo, unit, EUNISa_1, EUNISa_2) %>%
    distinct()
  
  ggplot() +
    # Raw data points
     geom_point(data = data_RS_S2_bands_indices %>%
                  select(PlotObservationID, DOY, NDMI, NDWI) %>%
                  pivot_longer(cols = c(NDMI, NDWI), names_to = "index",
                               values_to = "value") %>%
                  filter(PlotObservationID == id),
                aes(x = DOY, y = value), alpha = 0.6) +
    geom_line(data = plot_data, aes(x = DOY, y = value),
              size = 0.5, color = "blue") +
    facet_grid(cols = vars(index)) +
    labs(
      title = glue::glue("{id} | {metadata$biogeo}{metadata$unit} | {metadata$EUNISa_1} | {metadata$EUNISa_2}"),
      x = "Day of Year",
      y = "Index Value"
    ) +
    theme_minimal() + theme(legend.position = "top")
})

# Name the list by PlotObservationID
names(ts_plots_NDMI_NDWI) <- unique_ids

# Display the first plot
print(ts_plots_NDMI_NDWI[[1]])
```

Save each plot to a file:

```{r eval=FALSE, include=FALSE}
walk2(ts_plots_NDMI_NDWI, seq_along(ts_plots_NDMI_NDWI), ~ ggsave(
  filename = paste0("C:/Analyses/MOTIVATE_validation/output/figures/phenology/ts_NDMI_NDWI_S2/ts_plots_NDVI_NDMI",
                    .y, ".jpeg"),
  plot = .x,
  width = 8,
  height = 5
))
```

# Get indices data (max. and min.)

Careful! These maximum and minimum values are from the smoothed time series. For NDVI / EVI / SAVI values in DOY 1–50 and DOY 315–end, remember that the GAM smoothing function replaced the original values with the mean base value of observations during each of these respective periods. This was so far not done for NDMI and NDWI. 

```{r}
final_indices_data <- GAM_data %>%
  group_by(PlotObservationID, index) %>%
  summarise(max = max(value), min = min(value)) %>%
  ungroup() %>%
  pivot_wider(names_from = index, values_from = c(max, min),
              names_glue = "{index}_{.value}") %>%
  full_join(
    smoothed_data %>%
      group_by(PlotObservationID, index) %>%
      summarise(max = max(value), min = min(value)) %>%
      ungroup() %>%
      pivot_wider(names_from = index, values_from = c(max, min),
                  names_glue = "{index}_{.value}")
    )
```

# Get phenology data

Use GAM iter_3 to get dates of the moments, values at those moments and AUC (time-integrated indices) between SOS and EOS:

```{r}
# Join to get values at SOS, POS, EOS and auc
final_phenology_data <- GAM_data %>%
  mutate(
    stage = case_when(
      DOY == sos_slope ~ "sos_slope",
      DOY == sos_threshold ~ "sos_threshold",
      DOY == pos ~ "pos",
      DOY == eos_slope ~ "eos_slope",
      DOY == eos_threshold ~ "eos_threshold",
      TRUE ~ NA_character_
    )
  ) %>%
  dplyr::filter(!is.na(stage)) %>%
  select(PlotObservationID, index, stage, doy = DOY, value) %>%
  pivot_wider(
    names_from = c(index, stage),
    values_from = c(doy, value),
    names_glue = "{index}_{stage}_{.value}"
  ) %>%
  # Convert list cols to regular numeric cols
  mutate(
    NDVI_sos_slope_value = map_dbl(NDVI_sos_slope_value, 1),
    NDVI_sos_threshold_value = map_dbl(NDVI_sos_threshold_value, 1),
    NDVI_pos_value = map_dbl(NDVI_pos_value, 1),
    NDVI_eos_slope_value = map_dbl(NDVI_eos_slope_value, 1),
    NDVI_eos_threshold_value = map_dbl(NDVI_eos_threshold_value, 1),
    EVI_sos_slope_value = map_dbl(EVI_sos_slope_value, 1),
    EVI_sos_threshold_value = map_dbl(EVI_sos_threshold_value, 1),
    EVI_pos_value = map_dbl(EVI_pos_value, 1),
    EVI_eos_slope_value = map_dbl(EVI_eos_slope_value, 1),
    EVI_eos_threshold_value = map_dbl(EVI_eos_threshold_value, 1),
    SAVI_sos_slope_value = map_dbl(SAVI_sos_slope_value, 1),
    SAVI_sos_threshold_value = map_dbl(SAVI_sos_threshold_value, 1),
    SAVI_pos_value = map_dbl(SAVI_pos_value, 1),
    SAVI_eos_slope_value = map_dbl(SAVI_eos_slope_value, 1),
    SAVI_eos_threshold_value = map_dbl(SAVI_eos_threshold_value, 1)
  ) %>%
  full_join(GAM_data %>%
              distinct(PlotObservationID, index, auc_slope, auc_threshold) %>%
              pivot_wider(names_from = index, values_from = c(auc_slope, auc_threshold),
                          names_glue = "{index}_{.value}"))
```

# Join indices and phenology data

```{r}
final_RS_data <- full_join(
  # Indices data (max and min)
  final_indices_data,
  # Average values of indices per month
  monthly_avg_indices %>%
    pivot_wider(names_from = index, values_from = c(avg_value_01:auc_mar_oct),
                names_glue = "{index}_{.value}")
  ) %>%
  full_join(
    # Phenology data
    final_phenology_data 
    ) %>%
  # Sort cols in alphabetical order
  select(PlotObservationID, sort(names(.)[names(.) != "PlotObservationID"]))
```

# Add EUNIS codes

```{r}
final_RS_data <- final_RS_data %>% left_join(db_Europa_allobs)
```

```{r}
data_RS_S2_bands_indices <- data_RS_S2_bands_indices %>%
  left_join(db_Europa_allobs)
```

# Monthly spectrophenology per habitat type

```{r}
# Prepare the data
data_monthly_EUNISa_1 <- data_RS_S2_bands_indices %>%
  mutate(month = month(date, label = TRUE, abbr = TRUE, locale="EN-us")) %>%
  group_by(month, EUNISa_1, EUNISa_1_descr) %>%
  summarise(
    mean_NDVI = mean(NDVI, na.rm = TRUE),
    sd_NDVI = sd(NDVI, na.rm = TRUE),
    n_NDVI = sum(!is.na(NDVI)),
    mean_EVI = mean(EVI, na.rm = TRUE),
    sd_EVI = sd(EVI, na.rm = TRUE),
    n_EVI = sum(!is.na(EVI)),
    mean_SAVI = mean(SAVI, na.rm = TRUE),
    sd_SAVI = sd(SAVI, na.rm = TRUE),
    n_SAVI = sum(!is.na(SAVI)),
    .groups = "drop"
  )

data_monthly_EUNISa_1 <- data_monthly_EUNISa_1 %>%
  # Add label with n
  left_join(data_monthly_EUNISa_1 %>%
              group_by(EUNISa_1) %>%
              summarise(n_total = sum(n_NDVI, na.rm = TRUE)), 
            by = "EUNISa_1") %>%
  mutate(EUNISa_1_label = paste0(EUNISa_1, " (n = ", n_total, ")"))

data_monthly_EUNISa_2 <- data_RS_S2_bands_indices %>%
  mutate(month = month(date, label = TRUE, abbr = TRUE, locale="EN-us")) %>%
  group_by(month, EUNISa_1, EUNISa_1_descr, EUNISa_2, EUNISa_2_descr) %>%
  summarise(
    mean_NDVI = mean(NDVI, na.rm = TRUE),
    sd_NDVI = sd(NDVI, na.rm = TRUE),
    n_NDVI = sum(!is.na(NDVI)),
    mean_EVI = mean(EVI, na.rm = TRUE),
    sd_EVI = sd(EVI, na.rm = TRUE),
    n_EVI = sum(!is.na(EVI)),
    mean_SAVI = mean(SAVI, na.rm = TRUE),
    sd_SAVI = sd(SAVI, na.rm = TRUE),
    n_SAVI = sum(!is.na(SAVI)),
    .groups = "drop"
  )

data_monthly_EUNISa_2 <- data_monthly_EUNISa_2 %>%
  # Add label with n
  left_join(data_monthly_EUNISa_2 %>%
              group_by(EUNISa_2) %>%
              summarise(n_total = sum(n_NDVI, na.rm = TRUE)), 
            by = "EUNISa_2") %>%
  mutate(EUNISa_2_label = paste0(EUNISa_2, " (n = ", n_total, ")"))
```

## EUNIS level 1

```{r}
# Plots

# EUNISa_1
ggplot(data_monthly_EUNISa_1 %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_1_label, EUNISa_1_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_1, EUNISa_1_descr, sep = " - ")), 
       aes(x = month, y = mean_NDVI, color = EUNIS, 
           group = EUNISa_1_label)) +
  geom_point() +
  geom_line(aes(group = EUNISa_1)) +
  #geom(aes(ymin = mean_NDVI - sd_NDVI, ymax = mean_NDVI + sd_NDVI), 
  #width = 0.2) +
  labs(
    title = "Monthly NDVI by Habitat Type",
    x = "Month",
    y = "NDVI",
    color = "Habitat (EUNIS1)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS1_NDVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)

ggplot(data_monthly_EUNISa_1 %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_1_label, EUNISa_1_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_1, EUNISa_1_descr, sep = " - ")), 
       aes(x = month, y = mean_EVI, color = EUNIS, 
           group = EUNISa_1_label)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_EVI - sd_EVI, ymax = mean_EVI + sd_EVI), 
                #width = 0.2) +
  labs(
    title = "Monthly EVI by Habitat Type",
    x = "Month",
    y = "EVI",
    color = "Habitat (EUNIS1)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS1_EVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)

ggplot(data_monthly_EUNISa_1 %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_1_label, EUNISa_1_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_1, EUNISa_1_descr, sep = " - ")), 
       aes(x = month, y = mean_SAVI, color = EUNIS, 
           group = EUNISa_1_label)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_SAVI - sd_SAVI, ymax = mean_SAVI + sd_SAVI), 
                #width = 0.2) +
  labs(
    title = "Monthly SAVI by Habitat Type",
    x = "Month",
    y = "SAVI",
    color = "Habitat (EUNIS1)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS1_SAVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)
```

## EUNIS level 2

### Q

```{r}
# NDVI
ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "Q" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Qa" & EUNISa_2 != "Qb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_NDVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_NDVI - sd_NDVI, ymax = mean_NDVI + sd_NDVI), 
                #width = 0.2) +
  labs(
    title = "Monthly NDVI by Habitat Type",
    x = "Month",
    y = "NDVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_Q_NDVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)

# EVI
ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "Q" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Qa" & EUNISa_2 != "Qb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_EVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_EVI - sd_EVI, ymax = mean_EVI + sd_EVI), 
                #width = 0.2) +
  labs(
    title = "Monthly EVI by Habitat Type",
    x = "Month",
    y = "EVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_Q_EVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)

# SAVI
ggplot(data_monthly_EUNISa_2 %>%
         dplyr::filter(EUNISa_1 == "Q" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Qa" & EUNISa_2 != "Qb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_SAVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_SAVI - sd_SAVI, ymax = mean_SAVI + sd_SAVI), 
                #width = 0.2) +
  labs(
    title = "Monthly SAVI by Habitat Type",
    x = "Month",
    y = "SAVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_Q_SAVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)
```

### R

```{r}
ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "R" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_NDVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_NDVI - sd_NDVI, ymax = mean_NDVI + sd_NDVI), 
                #width = 0.2) +
  labs(
    title = "Monthly NDVI by Habitat Type",
    x = "Month",
    y = "NDVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_R_NDVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)

ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "R" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_EVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_EVI - sd_EVI, ymax = mean_EVI + sd_EVI), 
                #width = 0.2) +
  labs(
    title = "Monthly EVI by Habitat Type",
    x = "Month",
    y = "EVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_R_EVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)

ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "R" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_SAVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_SAVI - sd_SAVI, ymax = mean_SAVI + sd_SAVI), 
                #width = 0.2) +
  labs(
    title = "Monthly SAVI by Habitat Type",
    x = "Month",
    y = "SAVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_R_SAVI.jpeg"),
  dpi = 300, width = 7, height = 2.5)
```

### S

```{r}
ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "S" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Sa" & EUNISa_2 != "Sb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_NDVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_NDVI - sd_NDVI, ymax = mean_NDVI + sd_NDVI), 
                #width = 0.2) +
  labs(
    title = "Monthly NDVI by Habitat Type",
    x = "Month",
    y = "NDVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_S_NDVI.jpeg"),
  dpi = 300, width = 8, height = 2.5)

ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "S" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Sa" & EUNISa_2 != "Sb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_EVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_EVI - sd_EVI, ymax = mean_EVI + sd_EVI), 
                #width = 0.2) +
  labs(
    title = "Monthly EVI by Habitat Type",
    x = "Month",
    y = "EVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_S_EVI.jpeg"),
  dpi = 300, width = 8, height = 2.5)

ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "S" & !is.na(EUNISa_2)) %>%
         # Remove those with EUNIS level 2 that does not match current classif
         dplyr::filter(EUNISa_2 != "Sa" & EUNISa_2 != "Sb") %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_SAVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_SAVI - sd_SAVI, ymax = mean_SAVI + sd_SAVI), 
                #width = 0.2) +
  labs(
    title = "Monthly SAVI by Habitat Type",
    x = "Month",
    y = "SAVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_S_SAVI.jpeg"),
  dpi = 300, width = 8, height = 2.5)
```

### T

```{r}
ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "T" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_NDVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_NDVI - sd_NDVI, ymax = mean_NDVI + sd_NDVI), 
                #width = 0.2) +
  labs(
    title = "Monthly NDVI by Habitat Type",
    x = "Month",
    y = "NDVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_T_NDVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)

ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "T" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_EVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_EVI - sd_EVI, ymax = mean_EVI + sd_EVI), 
                #width = 0.2) +
  labs(
    title = "Monthly EVI by Habitat Type",
    x = "Month",
    y = "EVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_T_EVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)

ggplot(data_monthly_EUNISa_2 %>% 
         dplyr::filter(EUNISa_1 == "T" & !is.na(EUNISa_2)) %>%
         # If we want to have n points
         # mutate(EUNIS = paste(EUNISa_2_label, EUNISa_2_descr, sep = " - ")), 
         mutate(EUNIS = paste(EUNISa_2, EUNISa_2_descr, sep = " - ")), 
       aes(x = month, y = mean_SAVI, color = EUNIS, group = EUNIS)) +
  geom_point() +
  geom_line() +
  #geom(aes(ymin = mean_SAVI - sd_SAVI, ymax = mean_SAVI + sd_SAVI), 
                #width = 0.2) +
  labs(
    title = "Monthly SAVI by Habitat Type",
    x = "Month",
    y = "SAVI",
    color = "Habitat (EUNIS2)"
  ) +
  theme_minimal()
ggsave(
  here("output", "figures", "monthly_spectrophenology_S2", "EUNIS2_T_SAVI.jpeg"),
  dpi = 300, width = 6, height = 2.5)
```

# Calculate other phenological metrics

```{r}
final_RS_data <- final_RS_data %>%
  mutate(
    # with slope method
    # Growing season duration
    NDVI_slope_gsd = NDVI_eos_slope_doy - NDVI_sos_slope_doy,
    EVI_slope_gsd = NDVI_eos_slope_doy - NDVI_sos_slope_doy,
    SAVI_slope_gsd = SAVI_eos_slope_doy - SAVI_sos_slope_doy,
    # Difference in value between pos and sos
    NDVI_diff_pos_sos_slope_value = NDVI_pos_value - NDVI_sos_slope_value,
    EVI_diff_pos_sos_slope_value = EVI_pos_value - EVI_sos_slope_value,
    SAVI_diff_pos_sos_slope_value = SAVI_pos_value - SAVI_sos_slope_value,
    # Difference in value between pos and eos
    NDVI_diff_pos_eos_slope_value = NDVI_pos_value - NDVI_eos_slope_value,
    EVI_diff_pos_eos_slope_value = EVI_pos_value - EVI_eos_slope_value,
    SAVI_diff_pos_eos_slope_value = SAVI_pos_value - SAVI_eos_slope_value,
    # Difference in doy between pos and sos
    NDVI_diff_pos_sos_slope_doy = NDVI_pos_doy - NDVI_sos_slope_doy,
    EVI_diff_pos_sos_slope_doy = EVI_pos_doy - EVI_sos_slope_doy,
    SAVI_diff_pos_sos_slope_doy = SAVI_pos_doy - SAVI_sos_slope_doy,
    # Absolute difference in doy between pos and eos
    NDVI_diff_pos_eos_slope_doy = abs(NDVI_pos_doy - NDVI_eos_slope_doy),
    EVI_diff_pos_eos_slope_doy = abs(EVI_pos_doy - EVI_eos_slope_doy),
    SAVI_diff_pos_eos_slope_doy = abs(SAVI_pos_doy - SAVI_eos_slope_doy),
    # With threshold method
    # Growing season duration
    NDVI_threshold_gsd = NDVI_eos_threshold_doy - NDVI_sos_threshold_doy,
    EVI_threshold_gsd = NDVI_eos_threshold_doy - NDVI_sos_threshold_doy,
    SAVI_threshold_gsd = SAVI_eos_threshold_doy - SAVI_sos_threshold_doy,
    # Difference in value between pos and sos
    NDVI_diff_pos_sos_threshold_value =
      NDVI_pos_value - NDVI_sos_threshold_value,
    EVI_diff_pos_sos_threshold_value = 
      EVI_pos_value - EVI_sos_threshold_value,
    SAVI_diff_pos_sos_threshold_value = 
      SAVI_pos_value - SAVI_sos_threshold_value,
    # Difference in value between pos and eos
    NDVI_diff_pos_eos_threshold_value =
      NDVI_pos_value - NDVI_eos_threshold_value,
    EVI_diff_pos_eos_threshold_value = 
      EVI_pos_value - EVI_eos_threshold_value,
    SAVI_diff_pos_eos_threshold_value = 
      SAVI_pos_value - SAVI_eos_threshold_value,
    # Difference in doy between pos and sos
    NDVI_diff_pos_sos_threshold_doy = NDVI_pos_doy - NDVI_sos_threshold_doy,
    EVI_diff_pos_sos_threshold_doy = EVI_pos_doy - EVI_sos_threshold_doy,
    SAVI_diff_pos_sos_threshold_doy = SAVI_pos_doy - SAVI_sos_threshold_doy,
    # Absolute difference in doy between pos and eos
    NDVI_diff_pos_eos_threshold_doy = 
      abs(NDVI_pos_doy - NDVI_eos_threshold_doy),
    EVI_diff_pos_eos_threshold_doy = 
      abs(EVI_pos_doy - EVI_eos_threshold_doy),
    SAVI_diff_pos_eos_threshold_doy = 
      abs(SAVI_pos_doy - SAVI_eos_threshold_doy),
    # With months method
    # Difference in value between pos and march
    NDVI_diff_pos_march_value = NDVI_pos_value - NDVI_avg_value_03,
    EVI_diff_pos_march_value = EVI_pos_value - EVI_avg_value_03,
    SAVI_diff_pos_march_value = SAVI_pos_value - SAVI_avg_value_03,
    # Difference in value between pos and oct
    NDVI_diff_pos_oct_value = NDVI_pos_value - NDVI_avg_value_10,
    EVI_diff_pos_oct_value = EVI_pos_value - EVI_avg_value_10,
    SAVI_diff_pos_oct_value = SAVI_pos_value - SAVI_avg_value_10,
    # Difference in doy between pos and march
    NDVI_diff_pos_march_doy = NDVI_pos_doy - 75, 
    EVI_diff_pos_march_doy = EVI_pos_doy - 75,
    SAVI_diff_pos_march_doy = SAVI_pos_doy - 75,
    # Difference in doy between pos and oct
    NDVI_diff_pos_oct_doy = abs(NDVI_pos_doy - 285),
    EVI_diff_pos_oct_doy = abs(EVI_pos_doy - 285),
    SAVI_diff_pos_oct_doy = abs(SAVI_pos_doy - 285)
  )
```

## Checks

### Slope method

```{r}
# Growing season duration should be positive
nrow(final_RS_data %>% 
       dplyr::filter(NDVI_slope_gsd <= 0 | EVI_slope_gsd <= 0 | 
                       SAVI_slope_gsd <= 0))
# Difference in value between pos and sos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_sos_slope_value <= 0))
nrow(final_RS_data %>%
       dplyr::filter(EVI_diff_pos_sos_slope_value <= 0))
nrow(final_RS_data %>%
       dplyr::filter(SAVI_diff_pos_sos_slope_value <= 0))
# Difference in value between pos and eos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_eos_slope_value <= 0))
nrow(final_RS_data %>%
       dplyr::filter(EVI_diff_pos_eos_slope_value <= 0))
nrow(final_RS_data %>%
       dplyr::filter(SAVI_diff_pos_eos_slope_value <= 0))
# Difference in doy between pos and sos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_sos_slope_doy <= 0 |
                       EVI_diff_pos_sos_slope_doy <= 0 |
                       SAVI_diff_pos_sos_slope_doy <= 0))
# Difference in doy between eos and pos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_eos_slope_doy <= 0 |
                       EVI_diff_pos_eos_slope_doy <= 0 |
                       SAVI_diff_pos_eos_slope_doy <= 0))
```

### Threshold method

```{r}
# Growing season duration should be positive
nrow(final_RS_data %>% 
       dplyr::filter(NDVI_threshold_gsd <= 0 | EVI_threshold_gsd <= 0 | 
                       SAVI_threshold_gsd <= 0))
# Difference in value between pos and sos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_sos_threshold_value <= 0))
nrow(final_RS_data %>%
       dplyr::filter(EVI_diff_pos_sos_threshold_value <= 0))
nrow(final_RS_data %>%
       dplyr::filter(SAVI_diff_pos_sos_threshold_value <= 0))
# Difference in value between pos and eos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_eos_threshold_value <= 0))
nrow(final_RS_data %>%
       dplyr::filter(EVI_diff_pos_eos_threshold_value <= 0))
nrow(final_RS_data %>%
       dplyr::filter(SAVI_diff_pos_eos_threshold_value <= 0))
# Difference in doy between pos and sos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_sos_threshold_doy <= 0 |
                       EVI_diff_pos_sos_threshold_doy <= 0 |
                       SAVI_diff_pos_sos_threshold_doy <= 0))
# Difference in doy between eos and pos should be positive
nrow(final_RS_data %>%
       dplyr::filter(NDVI_diff_pos_eos_threshold_doy <= 0 |
                       EVI_diff_pos_eos_threshold_doy <= 0 |
                       SAVI_diff_pos_eos_threshold_doy <= 0))
```

# HERE: Run. Detect number of peaks in smoothed curves

```{r}
# Set up parallel plan
plan(multisession, workers = min(parallel::detectCores() - 1))

# Define peak-counting function
count_peaks <- function(df) {
  # Convert 1D array column to numeric vector
  y <- as.numeric(df$value)
  
  peaks <- findpeaks(y, minpeakdistance = 30, threshold = 0.02)
  
  tibble(
    PlotObservationID = unique(df$PlotObservationID),
    index = unique(df$index),
    num_peaks = if (!is.null(peaks)) nrow(peaks) else 0
  )
}

# Apply peak counting in parallel
peak_counts <- GAM_data %>%
  filter(fit_type == "iter_3") %>%
  arrange(DOY) %>%
  group_by(PlotObservationID, index) %>%
  nest() %>%
  mutate(result = future_map(data, count_peaks, .progress = TRUE,
                             .options = furrr_options(scheduling = Inf))) %>%
  select(-data) %>%
  unnest(result)

# Summarize result
peak_counts_summary <- peak_counts %>% count(index, num_peaks)
```

# HERE: save

```{r}
# # Function to count peaks for each PlotObservationID
# count_peaks <- function(df) {
#   y <- df$value
#   peaks <- findpeaks(y, 
#                      # Minimum number of indices (e.g., DOY steps)
#                      # between two peaks
#                      minpeakdistance = 30, 
#                      # Minimum vertical difference between a peak
#                      # and its surrounding value
#                      threshold = 0.02)
#   num_peaks <- if (!is.null(peaks)) nrow(peaks) else 0
#   return(tibble(PlotObservationID = unique(df$PlotObservationID),
#                 num_peaks = num_peaks))
# }
# 
# # Apply to each group
# peak_counts <- GAM_data %>%
#   mutate(value = map_dbl(value, 1)) %>%
#   dplyr::filter(fit_type == "iter_3") %>%
#   arrange(DOY) %>%
#   group_by(PlotObservationID, index) %>%
#   group_modify(~ count_peaks(.x)) %>%
#   ungroup()
# 
# # View result
# peak_counts %>% count(index, num_peaks)
```

## Plot number of peaks

```{r}
peak_counts %>% count(index, num_peaks) %>%
  ggplot(aes(x = index, y = n, fill = factor(num_peaks))) +
  geom_bar(stat = "identity", position = position_dodge())
```

EVI gives less problems, maybe use only this one?

## Add number of peaks to data

```{r}
final_RS_data <- final_RS_data %>%
  left_join(peak_counts %>%
              pivot_wider(names_from = index, values_from = num_peaks,
                          names_glue = "{index}_{.value}"))
```

## Plot fit and moments for PlotObservationIDs with zero peaks

```{r eval=FALSE, include=FALSE}
# Get unique PlotObservationIDs
unique_ids <- final_RS_data %>%
  dplyr::filter(EVI_num_peaks == 0) %>%
  pull(PlotObservationID) %>%
  as.character()

# Create and store plots in a list
plots_EVI_0peaks <- map(unique_ids[1:50], function(id) {
  plot_data <- GAM_data %>%
    dplyr::filter(as.character(PlotObservationID) == id) %>%
    dplyr::filter(fit_type == "observed" | fit_type == "iter_3")
  
  ggplot() +
    # Raw data points
    geom_point(data = dplyr::filter(plot_data, fit_type == "observed"), 
               aes(x = DOY, y = value), alpha = 0.5) +
    geom_line(data = dplyr::filter(plot_data, fit_type == "iter_3"),
              aes(x = DOY, y = value), size = 0.5, color = "red") +
    geom_vline(data = dplyr::filter(plot_data, fit_type == "iter_3"),
               aes(xintercept = sos),
               linetype = "dashed", size = 0.5, color = "red") +
    geom_vline(data = dplyr::filter(plot_data, fit_type == "iter_3"),
               aes(xintercept = pos),
               linetype = "dotted", size = 0.5, color = "red") +
    geom_vline(data = dplyr::filter(plot_data, fit_type == "iter_3"),
               aes(xintercept = eos),
               linetype = "dashed", size = 0.5, color = "red") +
    facet_grid(cols = vars(index)) +
    labs(
      title = glue::glue("PlotObservationID: {id}"),
      x = "Day of Year",
      y = "Index Value"
    ) +
    theme_minimal() + theme(legend.position = "top")
})

# Name the list by PlotObservationID
names(plots_EVI_0peaks) <- unique_ids[1:50]

# Display the first plot
print(plots_EVI_0peaks[[1]])
```

## Further checks (EVI)

### Slope method

```{r}
# Difference in value between pos and sos should be positive
nrow(final_RS_data %>%
       dplyr::filter(EVI_diff_pos_sos_slope_value <= 0))
final_RS_data %>%
  dplyr::filter(EVI_diff_pos_sos_slope_value <= 0) %>%
  count(EVI_num_peaks)
# Difference in value between pos and eos should be positive
nrow(final_RS_data %>%
       dplyr::filter(EVI_diff_pos_eos_slope_value <= 0))
final_RS_data %>%
  dplyr::filter(EVI_diff_pos_eos_slope_value <= 0) %>%
  count(EVI_num_peaks)
```

### Threshold method

# Add some columns needed

```{r}
final_RS_data <- final_RS_data %>%
  left_join(
    data_RS_S2_bands_indices %>%
      distinct(PlotObservationID, year, biogeo, unit, Lctnmth)
    )
```

# Add canopy height data

Read the data:

```{r}
data_RS_CH <- read_csv(
  "C:/Data/MOTIVATE/MOTIVATE_RS_data/Canopy_Height_1m/Europe_points_CanopyHeight_1m.csv")
db_Europa <- read_csv(
  here("..", "DB_first_check", "data", "clean","db_Europa_20250107.csv")
  )
```

```{r}
data_RS_CH_ID <- db_Europa %>%
  select(PlotObservationID, obs_unique_id) %>%
  right_join(data_RS_CH %>%
              # Rename to be able to join on this column
              rename(obs_unique_id = obs_unique)) %>%
  select(PlotObservationID, canopy_height)
```

Join:

```{r}
final_RS_data <- final_RS_data %>%
  left_join(data_RS_CH_ID %>%
              mutate(PlotObservationID = factor(PlotObservationID)))
```

# Save to clean data

```{r}
write_tsv(final_RS_data,
          here("data", "clean","final_RS_data_bands_S2_all_20250804.csv"))
```

# Session info

```{r}
sessionInfo()
```

